space.c 38.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.  If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
19
20
21
22
23
24
25
26

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
27
#include <string.h>
28
#include <stdlib.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
29

30
31
/* MPI headers. */
#ifdef WITH_MPI
32
#include <mpi.h>
33
34
#endif

35
36
37
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
38
/* Local headers. */
39
#include "atomic.h"
40
#include "engine.h"
41
#include "error.h"
42
43
#include "kernel.h"
#include "lock.h"
44
#include "minmax.h"
45
#include "runner.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
46

47
48
49
/* Shared sort structure. */
struct parallel_sort space_sort_struct;

Pedro Gonnet's avatar
Pedro Gonnet committed
50
51
/* Split size. */
int space_splitsize = space_splitsize_default;
52
int space_subsize = space_subsize_default;
53
int space_maxsize = space_maxsize_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
54
55
56

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

85
86
87
88
89
90
91
92
93
94
95
96
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */

97
98
99
100
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
101
102
103
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
104
105
106
107
108
109
110
111
112
113
114
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
115
  int sid = 0;
116
  for (int k = 0; k < 3; k++)
117
118
119
120
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
121
    struct cell *temp = *ci;
122
123
    *ci = *cj;
    *cj = temp;
124
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
125
126
127
128
129
130
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
131

132
/**
133
 * @brief Recursively dismantle a cell tree.
134
135
 *
 */
136
137
138
139

void space_rebuild_recycle(struct space *s, struct cell *c) {

  if (c->split)
140
    for (int k = 0; k < 8; k++)
141
142
143
144
145
146
147
      if (c->progeny[k] != NULL) {
        space_rebuild_recycle(s, c->progeny[k]);
        space_recycle(s, c->progeny[k]);
        c->progeny[k] = NULL;
      }
}

148
/**
149
 * @brief Re-build the cell grid.
150
 *
151
152
 * @param s The #space.
 * @param cell_max Maximum cell edge length.
153
 * @param verbose Print messages to stdout or not.
154
 */
155

156
void space_regrid(struct space *s, double cell_max, int verbose) {
157

158
159
  float h_max = s->cell_min / kernel_gamma / space_stretch;
  const size_t nr_parts = s->nr_parts;
160
  struct cell *restrict c;
161
  ticks tic = getticks();
162
163
164
165

  /* Run through the parts and get the current h_max. */
  // tic = getticks();
  if (s->cells != NULL) {
166
    for (int k = 0; k < s->nr_cells; k++) {
167
      if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max;
168
    }
169
  } else {
170
    for (int k = 0; k < nr_parts; k++) {
171
172
173
174
175
176
177
178
179
180
181
182
      if (s->parts[k].h > h_max) h_max = s->parts[k].h;
    }
    s->h_max = h_max;
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
183
      error("Failed to aggregate the rebuild flag across nodes.");
184
185
186
    h_max = buff;
  }
#endif
187
  if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
188
189

  /* Get the new putative cell dimensions. */
190
  int cdim[3];
191
  for (int k = 0; k < 3; k++)
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    cdim[k] =
        floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max));

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
        "is switched on.");

/* In MPI-Land, we're not allowed to change the top-level cell size. */
#ifdef WITH_MPI
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2])
    error("Root-level change of cell size not allowed.");
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
  if (s->cells == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
      cdim[2] < s->cdim[2]) {

    /* Free the old cells, if they were allocated. */
    if (s->cells != NULL) {
214
      for (int k = 0; k < s->nr_cells; k++) {
215
216
217
218
219
220
221
222
        space_rebuild_recycle(s, &s->cells[k]);
        if (s->cells[k].sort != NULL) free(s->cells[k].sort);
      }
      free(s->cells);
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
223
    for (int k = 0; k < 3; k++) {
224
225
226
227
      s->cdim[k] = cdim[k];
      s->h[k] = s->dim[k] / cdim[k];
      s->ih[k] = 1.0 / s->h[k];
    }
228
    const float dmin = fminf(s->h[0], fminf(s->h[1], s->h[2]));
229
230
231
232
233
234
235

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
    if (posix_memalign((void *)&s->cells, 64,
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
    bzero(s->cells, s->nr_cells * sizeof(struct cell));
236
    for (int k = 0; k < s->nr_cells; k++)
237
238
239
      if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock.");

    /* Set the cell location and sizes. */
240
241
242
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
243
244
245
246
247
248
249
250
251
252
253
254
255
          c = &s->cells[cell_getid(cdim, i, j, k)];
          c->loc[0] = i * s->h[0];
          c->loc[1] = j * s->h[1];
          c->loc[2] = k * s->h[2];
          c->h[0] = s->h[0];
          c->h[1] = s->h[1];
          c->h[2] = s->h[2];
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
          c->super = c;
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
256
        }
257
258

    /* Be verbose about the change. */
259
260
261
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
262
263
264
    fflush(stdout);

  } /* re-build upper-level cells? */
265
266
  // message( "rebuilding upper-level cells took %.3f %s." ,
  // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
267
268
269
270
271

  /* Otherwise, just clean up the cells. */
  else {

    /* Free the old cells, if they were allocated. */
272
    for (int k = 0; k < s->nr_cells; k++) {
273
274
275
276
277
278
279
280
281
282
283
      space_rebuild_recycle(s, &s->cells[k]);
      s->cells[k].sorts = NULL;
      s->cells[k].nr_tasks = 0;
      s->cells[k].nr_density = 0;
      s->cells[k].nr_force = 0;
      s->cells[k].density = NULL;
      s->cells[k].force = NULL;
      s->cells[k].dx_max = 0.0f;
      s->cells[k].sorted = 0;
      s->cells[k].count = 0;
      s->cells[k].gcount = 0;
Matthieu Schaller's avatar
Matthieu Schaller committed
284
      s->cells[k].init = NULL;
Matthieu Schaller's avatar
Matthieu Schaller committed
285
      s->cells[k].ghost = NULL;
Matthieu Schaller's avatar
Matthieu Schaller committed
286
287
      s->cells[k].drift = NULL;
      s->cells[k].kick = NULL;
288
      s->cells[k].super = &s->cells[k];
289
    }
290
291
    s->maxdepth = 0;
  }
292
293
294
295

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
296
}
297
298
299
300
301
302

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
 * @param cell_max Maximal cell size.
303
 * @param verbose Print messages to stdout or not
304
305
 *
 */
306

307
void space_rebuild(struct space *s, double cell_max, int verbose) {
308

309
  ticks tic = getticks();
310
311
312
313
314

  /* Be verbose about this. */
  // message( "re)building space..." ); fflush(stdout);

  /* Re-grid if necessary, or just re-set the cell data. */
315
  space_regrid(s, cell_max, verbose);
316

317
318
319
320
321
322
  int nr_parts = s->nr_parts;
  int nr_gparts = s->nr_gparts;
  struct cell *restrict cells = s->cells;

  double ih[3], dim[3];
  int cdim[3];
323
324
325
326
327
328
329
330
331
  ih[0] = s->ih[0];
  ih[1] = s->ih[1];
  ih[2] = s->ih[2];
  dim[0] = s->dim[0];
  dim[1] = s->dim[1];
  dim[2] = s->dim[2];
  cdim[0] = s->cdim[0];
  cdim[1] = s->cdim[1];
  cdim[2] = s->cdim[2];
332
333
334
335

  /* Run through the particles and get their cell index. */
  // tic = getticks();
  const size_t ind_size = s->size_parts;
336
337
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
338
339
340
341
    error("Failed to allocate temporary particle indices.");
  for (int k = 0; k < nr_parts; k++) {
    struct part *restrict p = &s->parts[k];
    for (int j = 0; j < 3; j++)
342
343
344
345
      if (p->x[j] < 0.0)
        p->x[j] += dim[j];
      else if (p->x[j] >= dim[j])
        p->x[j] -= dim[j];
346
    ind[k] =
347
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
348
    cells[ind[k]].count++;
349
  }
Pedro Gonnet's avatar
Pedro Gonnet committed
350
351
  // message( "getting particle indices took %.3f %s." ,
  // clocks_from_ticks(getticks() - tic), clocks_getunit()):
352

353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
  /* Run through the gravity particles and get their cell index. */
  // tic = getticks();
  const size_t gind_size = s->size_gparts;
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
  for (int k = 0; k < nr_gparts; k++) {
    struct gpart *gp = &s->gparts[k];
    for (int j = 0; j < 3; j++)
      if (gp->x[j] < 0.0)
        gp->x[j] += dim[j];
      else if (gp->x[j] >= dim[j])
        gp->x[j] -= dim[j];
    gind[k] =
        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
    cells[gind[k]].gcount++;
  }
// message( "getting particle indices took %.3f %s." ,
// clocks_from_ticks(getticks() - tic), clocks_getunit());

373
374
#ifdef WITH_MPI
  /* Move non-local parts to the end of the list. */
375
  const int local_nodeID = s->e->nodeID;
376
  for (int k = 0; k < nr_parts; k++)
377
    if (cells[ind[k]].nodeID != local_nodeID) {
378
379
      cells[ind[k]].count -= 1;
      nr_parts -= 1;
380
381
382
      struct part tp = s->parts[k];
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
383
384
385
386
387
388
      if (s->parts[k].gpart != NULL) {
        s->parts[k].gpart->part = &s->parts[k];
      }
      if (s->parts[nr_parts].gpart != NULL) {
        s->parts[nr_parts].gpart->part = &s->parts[nr_parts];
      }
389
390
391
      struct xpart txp = s->xparts[k];
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
392
393
394
      int t = ind[k];
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
395
396
    }

397
398
399
400
401
402
403
404
  /* Move non-local gparts to the end of the list. */
  for (int k = 0; k < nr_gparts; k++)
    if (cells[ind[k]].nodeID != local_nodeID) {
      cells[ind[k]].gcount -= 1;
      nr_gparts -= 1;
      struct gpart tp = s->gparts[k];
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
405
406
407
408
409
410
      if (s->gparts[k].id > 0) {
        s->gparts[k].part->gpart = &s->gparts[k];
      }
      if (s->gparts[nr_gparts].id > 0) {
        s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts];
      }
411
412
413
414
415
      int t = ind[k];
      ind[k] = ind[nr_gparts];
      ind[nr_gparts] = t;
    }

416
417
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
418
419
420
  /* TODO: This function also exchanges gparts, but this is shorted-out
     until they are fully implemented. */
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
421
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
422
423
424
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);

425
  /* Add post-processing, i.e. re-linking/creating of gparts here. */
Pedro Gonnet's avatar
Pedro Gonnet committed
426
427

  /* Set the new particle counts. */
428
  s->nr_parts = nr_parts + nr_parts_exchanged;
429
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
430
431

  /* Re-allocate the index array if needed.. */
432
  if (s->nr_parts > ind_size) {
433
434
    int *ind_new;
    if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
435
      error("Failed to allocate temporary particle indices.");
436
    memcpy(ind_new, ind, sizeof(size_t) * nr_parts);
437
438
    free(ind);
    ind = ind_new;
439
440
441
  }

  /* Assign each particle to its cell. */
442
443
  for (int k = nr_parts; k < s->nr_parts; k++) {
    struct part *p = &s->parts[k];
444
    ind[k] =
445
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
446
447
448
449
    cells[ind[k]].count += 1;
    /* if ( cells[ ind[k] ].nodeID != nodeID )
        error( "Received part that does not belong to me (nodeID=%i)." , cells[
       ind[k] ].nodeID ); */
450
  }
451
  nr_parts = s->nr_parts;
452
453
454
#endif

  /* Sort the parts according to their cells. */
455
  space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
456
457

  /* Re-link the gparts. */
458
  for (int k = 0; k < nr_parts; k++)
459
    if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
460

461
  /* Verify space_sort_struct. */
462
  /* for ( k = 1 ; k < nr_parts ; k++ ) {
463
      if ( ind[k-1] > ind[k] ) {
464
465
          error( "Sort failed!" );
          }
466
      else if ( ind[k] != cell_getid( cdim , parts[k].x[0]*ih[0] ,
467
468
469
470
471
     parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) )
          error( "Incorrect indices!" );
      } */

  /* We no longer need the indices as of here. */
472
  free(ind);
473

474
475
476
477
#ifdef WITH_MPI

  /* Re-allocate the index array if needed.. */
  if (s->nr_gparts > gind_size) {
478
479
    int *gind_new;
    if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
480
      error("Failed to allocate temporary g-particle indices.");
481
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
482
483
484
485
486
    free(gind);
    gind = gind_new;
  }

  /* Assign each particle to its cell. */
487
  for (int k = nr_gparts; k < s->nr_gparts; k++) {
488
489
490
491
492
493
494
495
496
    struct gpart *p = &s->gparts[k];
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
    cells[gind[k]].count += 1;
    /* if ( cells[ ind[k] ].nodeID != nodeID )
        error( "Received part that does not belong to me (nodeID=%i)." , cells[
       ind[k] ].nodeID ); */
  }
  nr_gparts = s->nr_gparts;
497

498
#endif
499
500

  /* Sort the parts according to their cells. */
501
  space_gparts_sort(s->gparts, gind, nr_gparts, 0, s->nr_cells - 1);
502
503

  /* Re-link the parts. */
504
  for (int k = 0; k < nr_gparts; k++)
505
    if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k];
506
507

  /* We no longer need the indices as of here. */
508
  free(gind);
509
510
511

  /* Hook the cells up to the parts. */
  // tic = getticks();
512
513
514
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
515
516
  for (int k = 0; k < s->nr_cells; k++) {
    struct cell *restrict c = &cells[k];
517
518
519
520
521
522
523
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
  }
524
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
525
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
526
527
528

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
  space_split(s, cells, verbose);

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
 * @param s The #space.
 * @param cells The cell hierarchy
 * @param verbose Are we talkative ?
 */
void space_split(struct space *s, struct cell *cells, int verbose) {

  ticks tic = getticks();

  for (int k = 0; k < s->nr_cells; k++)
548
549
    scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k,
                      0, &cells[k], NULL, 0);
550
  engine_launch(s->e, s->e->nr_threads, 1 << task_type_split_cell, 0);
551

552
553
554
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
555
}
556

557
/**
558
559
 * @brief Sort the particles and condensed particles according to the given
 *indices.
560
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
561
 * @param s The #space.
562
563
564
565
 * @param ind The indices with respect to which the parts are sorted.
 * @param N The number of parts
 * @param min Lowest index.
 * @param max highest index.
566
 * @param verbose Are we talkative ?
567
 */
568

569
void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
570
571
572
573
574
                      int verbose) {

  ticks tic = getticks();

  /*Populate the global parallel_sort structure with the input data */
575
576
577
  space_sort_struct.parts = s->parts;
  space_sort_struct.xparts = s->xparts;
  space_sort_struct.ind = ind;
578
  space_sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
579
580
581
582
583
584
  if ((space_sort_struct.stack = malloc(sizeof(struct qstack) *
                                        space_sort_struct.stack_size)) == NULL)
    error("Failed to allocate sorting stack.");
  for (int i = 0; i < space_sort_struct.stack_size; i++)
    space_sort_struct.stack[i].ready = 0;

585
  /* Add the first interval. */
586
587
588
589
590
591
592
593
594
  space_sort_struct.stack[0].i = 0;
  space_sort_struct.stack[0].j = N - 1;
  space_sort_struct.stack[0].min = min;
  space_sort_struct.stack[0].max = max;
  space_sort_struct.stack[0].ready = 1;
  space_sort_struct.first = 0;
  space_sort_struct.last = 1;
  space_sort_struct.waiting = 1;

595
  /* Launch the sorting tasks. */
596
  engine_launch(s->e, s->e->nr_threads, (1 << task_type_psort), 0);
597
598

  /* Verify space_sort_struct. */
599
  /* for (int i = 1; i < N; i++)
600
    if (ind[i - 1] > ind[i])
601
602
      error("Sorting failed (ind[%i]=%i,ind[%i]=%i), min=%i, max=%i.", i - 1,
  ind[i - 1], i,
603
604
            ind[i], min, max);
  message("Sorting succeeded."); */
605

606
  /* Clean up. */
607
  free(space_sort_struct.stack);
608
609
610
611

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
612
}
613

614
void space_do_parts_sort() {
615

616
  /* Pointers to the sorting data. */
617
  int *ind = space_sort_struct.ind;
618
619
  struct part *parts = space_sort_struct.parts;
  struct xpart *xparts = space_sort_struct.xparts;
620

621
  /* Main loop. */
622
  while (space_sort_struct.waiting) {
623

624
    /* Grab an interval off the queue. */
625
626
    int qid =
        atomic_inc(&space_sort_struct.first) % space_sort_struct.stack_size;
627

628
    /* Wait for the entry to be ready, or for the sorting do be done. */
629
630
    while (!space_sort_struct.stack[qid].ready)
      if (!space_sort_struct.waiting) return;
631

632
    /* Get the stack entry. */
633
634
    ptrdiff_t i = space_sort_struct.stack[qid].i;
    ptrdiff_t j = space_sort_struct.stack[qid].j;
635
636
    int min = space_sort_struct.stack[qid].min;
    int max = space_sort_struct.stack[qid].max;
637
    space_sort_struct.stack[qid].ready = 0;
638

639
640
    /* Loop over sub-intervals. */
    while (1) {
641

642
      /* Bring beer. */
643
      const int pivot = (min + max) / 2;
644
645
      /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
              i, j, min, max, pivot); */
646
647

      /* One pass of QuickSort's partitioning. */
648
649
      ptrdiff_t ii = i;
      ptrdiff_t jj = j;
650
651
652
653
      while (ii < jj) {
        while (ii <= j && ind[ii] <= pivot) ii++;
        while (jj >= i && ind[jj] > pivot) jj--;
        if (ii < jj) {
654
          size_t temp_i = ind[ii];
655
656
          ind[ii] = ind[jj];
          ind[jj] = temp_i;
657
          struct part temp_p = parts[ii];
658
659
          parts[ii] = parts[jj];
          parts[jj] = temp_p;
660
          struct xpart temp_xp = xparts[ii];
661
662
663
664
          xparts[ii] = xparts[jj];
          xparts[jj] = temp_xp;
        }
      }
665

666
      /* Verify space_sort_struct. */
667
668
669
670
671
672
673
674
675
676
677
678
      /* for (int k = i; k <= jj; k++)
        if (ind[k] > pivot) {
          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
                  ind[k], pivot, i, j);
          error("Partition failed (<=pivot).");
        }
      for (int k = jj + 1; k <= j; k++)
        if (ind[k] <= pivot) {
          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i.", k,
                  ind[k], pivot, i, j);
          error("Partition failed (>pivot).");
        } */
679
680
681
682
683
684

      /* Split-off largest interval. */
      if (jj - i > j - jj + 1) {

        /* Recurse on the left? */
        if (jj > i && pivot > min) {
685
686
          qid = atomic_inc(&space_sort_struct.last) %
                space_sort_struct.stack_size;
687
688
          while (space_sort_struct.stack[qid].ready)
            ;
689
690
691
692
693
694
695
          space_sort_struct.stack[qid].i = i;
          space_sort_struct.stack[qid].j = jj;
          space_sort_struct.stack[qid].min = min;
          space_sort_struct.stack[qid].max = pivot;
          if (atomic_inc(&space_sort_struct.waiting) >=
              space_sort_struct.stack_size)
            error("Qstack overflow.");
696
          space_sort_struct.stack[qid].ready = 1;
697
        }
698

699
700
701
702
703
704
705
706
707
708
        /* Recurse on the right? */
        if (jj + 1 < j && pivot + 1 < max) {
          i = jj + 1;
          min = pivot + 1;
        } else
          break;

      } else {

        /* Recurse on the right? */
709
        if (pivot + 1 < max) {
710
711
          qid = atomic_inc(&space_sort_struct.last) %
                space_sort_struct.stack_size;
712
713
          while (space_sort_struct.stack[qid].ready)
            ;
714
715
716
717
718
719
720
          space_sort_struct.stack[qid].i = jj + 1;
          space_sort_struct.stack[qid].j = j;
          space_sort_struct.stack[qid].min = pivot + 1;
          space_sort_struct.stack[qid].max = max;
          if (atomic_inc(&space_sort_struct.waiting) >=
              space_sort_struct.stack_size)
            error("Qstack overflow.");
721
          space_sort_struct.stack[qid].ready = 1;
722
        }
723

724
725
726
727
728
729
730
        /* Recurse on the left? */
        if (jj > i && pivot > min) {
          j = jj;
          max = pivot;
        } else
          break;
      }
731

732
733
    } /* loop over sub-intervals. */

734
    atomic_dec(&space_sort_struct.waiting);
735
736

  } /* main loop. */
737
738
}

739
void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min,
740
                       int max) {
741
742

  struct qstack {
743
744
    volatile size_t i, j;
    volatile int min, max;
745
746
747
748
749
750
751
    volatile int ready;
  };
  struct qstack *qstack;
  int qstack_size = 2 * (max - min) + 10;
  volatile unsigned int first, last, waiting;

  int pivot;
752
  ptrdiff_t i, ii, j, jj, temp_i;
753
  int qid;
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
  struct gpart temp_p;

  /* for ( int k = 0 ; k < N ; k++ )
      if ( ind[k] > max || ind[k] < min )
          error( "ind[%i]=%i is not in [%i,%i]." , k , ind[k] , min , max ); */

  /* Allocate the stack. */
  if ((qstack = malloc(sizeof(struct qstack) * qstack_size)) == NULL)
    error("Failed to allocate qstack.");

  /* Init the interval stack. */
  qstack[0].i = 0;
  qstack[0].j = N - 1;
  qstack[0].min = min;
  qstack[0].max = max;
  qstack[0].ready = 1;
  for (i = 1; i < qstack_size; i++) qstack[i].ready = 0;
  first = 0;
  last = 1;
  waiting = 1;

775
776
  /* Main loop. */
  while (waiting > 0) {
777

778
779
    /* Grab an interval off the queue. */
    qid = (first++) % qstack_size;
780

781
782
783
784
785
786
787
788
789
    /* Get the stack entry. */
    i = qstack[qid].i;
    j = qstack[qid].j;
    min = qstack[qid].min;
    max = qstack[qid].max;
    qstack[qid].ready = 0;

    /* Loop over sub-intervals. */
    while (1) {
790

791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
      /* Bring beer. */
      pivot = (min + max) / 2;

      /* One pass of QuickSort's partitioning. */
      ii = i;
      jj = j;
      while (ii < jj) {
        while (ii <= j && ind[ii] <= pivot) ii++;
        while (jj >= i && ind[jj] > pivot) jj--;
        if (ii < jj) {
          temp_i = ind[ii];
          ind[ii] = ind[jj];
          ind[jj] = temp_i;
          temp_p = gparts[ii];
          gparts[ii] = gparts[jj];
          gparts[jj] = temp_p;
        }
      }
809

810
      /* Verify space_sort_struct. */
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
      /* for ( int k = i ; k <= jj ; k++ )
         if ( ind[k] > pivot ) {
         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
         N=%i." , k , ind[k] , pivot , i , j , N );
         error( "Partition failed (<=pivot)." );
         }
         for ( int k = jj+1 ; k <= j ; k++ )
         if ( ind[k] <= pivot ) {
         message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i,
         N=%i." , k , ind[k] , pivot , i , j , N );
         error( "Partition failed (>pivot)." );
         } */

      /* Split-off largest interval. */
      if (jj - i > j - jj + 1) {

        /* Recurse on the left? */
        if (jj > i && pivot > min) {
          qid = (last++) % qstack_size;
          qstack[qid].i = i;
          qstack[qid].j = jj;
          qstack[qid].min = min;
          qstack[qid].max = pivot;
          qstack[qid].ready = 1;
          if ((waiting++) >= qstack_size) error("Qstack overflow.");
        }
837

838
839
840
841
842
843
        /* Recurse on the right? */
        if (jj + 1 < j && pivot + 1 < max) {
          i = jj + 1;
          min = pivot + 1;
        } else
          break;
844

845
846
847
      } else {

        /* Recurse on the right? */
848
        if (pivot + 1 < max) {
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
          qid = (last++) % qstack_size;
          qstack[qid].i = jj + 1;
          qstack[qid].j = j;
          qstack[qid].min = pivot + 1;
          qstack[qid].max = max;
          qstack[qid].ready = 1;
          if ((waiting++) >= qstack_size) error("Qstack overflow.");
        }

        /* Recurse on the left? */
        if (jj > i && pivot > min) {
          j = jj;
          max = pivot;
        } else
          break;
      }

    } /* loop over sub-intervals. */

    waiting--;

  } /* main loop. */
871

872
  /* Verify space_sort_struct. */
873
874
875
876
877
878
879
880
  /* for ( i = 1 ; i < N ; i++ )
      if ( ind[i-1] > ind[i] )
          error( "Sorting failed (ind[%i]=%i,ind[%i]=%i)." , i-1 , ind[i-1] , i
     , ind[i] ); */

  /* Clean up. */
  free(qstack);
}
881

Pedro Gonnet's avatar
Pedro Gonnet committed
882
/**
883
 * @brief Mapping function to free the sorted indices buffers.
Pedro Gonnet's avatar
Pedro Gonnet committed
884
885
 */

886
void space_map_clearsort(struct cell *c, void *data) {
Pedro Gonnet's avatar
Pedro Gonnet committed
887

888
889
890
891
892
  if (c->sort != NULL) {
    free(c->sort);
    c->sort = NULL;
  }
}
Pedro Gonnet's avatar
Pedro Gonnet committed
893

894
895
896
/**
 * @brief Map a function to all particles in a cell recursively.
 *
897
 * @param c The #cell we are working in.
898
899
900
901
 * @param fun Function pointer to apply on the cells.
 * @param data Data passed to the function fun.
 */

Pedro Gonnet's avatar
Pedro Gonnet committed
902
903
904
905
static void rec_map_parts(struct cell *c,
                          void (*fun)(struct part *p, struct cell *c,
                                      void *data),
                          void *data) {
906
907
908
909
910
911

  int k;

  /* No progeny? */
  if (!c->split)
    for (k = 0; k < c->count; k++) fun(&c->parts[k], c, data);
Pedro Gonnet's avatar
Pedro Gonnet committed
912

913
914
915
916
917
918
  /* Otherwise, recurse. */
  else
    for (k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) rec_map_parts(c->progeny[k], fun, data);
}

Pedro Gonnet's avatar
Pedro Gonnet committed
919
/**
920
 * @brief Map a function to all particles in a space.
Pedro Gonnet's avatar
Pedro Gonnet committed
921
922
 *
 * @param s The #space we are working in.
923
924
 * @param fun Function pointer to apply on the cells.
 * @param data Data passed to the function fun.
Pedro Gonnet's avatar
Pedro Gonnet committed
925
926
 */

927
928
929
void space_map_parts(struct space *s,
                     void (*fun)(struct part *p, struct cell *c, void *data),
                     void *data) {
Pedro Gonnet's avatar
Pedro Gonnet committed
930

931
932
  int cid = 0;

933
  /* Call the recursive function on all higher-level cells. */
Pedro Gonnet's avatar
Pedro Gonnet committed
934
935
  for (cid = 0; cid < s->nr_cells; cid++)
    rec_map_parts(&s->cells[cid], fun, data);
936
}
937

938
939
940
941
942
943
944
945
/**
 * @brief Map a function to all particles in a cell recursively.
 *
 * @param c The #cell we are working in.
 * @param fun Function pointer to apply on the cells.
 */

static void rec_map_parts_xparts(struct cell *c,
946
947
                                 void (*fun)(struct part *p, struct xpart *xp,
                                             struct cell *c)) {
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968

  int k;

  /* No progeny? */
  if (!c->split)
    for (k = 0; k < c->count; k++) fun(&c->parts[k], &c->xparts[k], c);

  /* Otherwise, recurse. */
  else
    for (k = 0; k < 8; k++)
      if (c->progeny[k] != NULL) rec_map_parts_xparts(c->progeny[k], fun);
}

/**
 * @brief Map a function to all particles (#part and #xpart) in a space.
 *
 * @param s The #space we are working in.
 * @param fun Function pointer to apply on the particles in the cells.
 */

void space_map_parts_xparts(struct space *s,
969
970
                            void (*fun)(struct part *p, struct xpart *xp,
                                        struct cell *c)) {
971
972
973
974
975
976
977
978

  int cid = 0;

  /* Call the recursive function on all higher-level cells. */
  for (cid = 0; cid < s->nr_cells; cid++)
    rec_map_parts_xparts(&s->cells[cid], fun);
}

979
980
981
/**
 * @brief Map a function to all particles in a cell recursively.
 *
982
 * @param c The #cell we are working in.
983
984
985
986
 * @param full Map to all cells, including cells with sub-cells.
 * @param fun Function pointer to apply on the cells.
 * @param data Data passed to the function fun.
 */
987

Pedro Gonnet's avatar
Pedro Gonnet committed
988
989
990
static void rec_map_cells_post(struct cell *c, int full,
                               void (*fun)(struct cell *c, void *data),
                               void *data) {
991

992
993
994
995
996
  int k;

  /* Recurse. */
  if (c->split)
    for (k = 0; k < 8; k++)
Pedro Gonnet's avatar
Pedro Gonnet committed
997
998
999
      if (c->progeny[k] != NULL)
        rec_map_cells_post(c->progeny[k], full, fun, data);

1000
1001
  /* No progeny? */
  if (full || !c->split) fun(c, data);
1002
}
Pedro Gonnet's avatar
Pedro Gonnet committed
1003
1004

/**
1005
 * @brief Map a function to all particles in a aspace.
Pedro Gonnet's avatar
Pedro Gonnet committed
1006
1007
 *
 * @param s The #space we are working in.
1008
 * @param full Map to all cells, including cells with sub-cells.
1009
1010
 * @param fun Function pointer to apply on the cells.
 * @param data Data passed to the function fun.
Pedro Gonnet's avatar
Pedro Gonnet committed
1011
 */
1012

1013
void space_map_cells_post(struct space *s, int full,
Pedro Gonnet's avatar
Pedro Gonnet committed
1014
                          void (*fun)(struct cell *c, void *data), void *data) {
1015

1016
  int cid = 0;
1017

1018
  /* Call the recursive function on all higher-level cells. */
Pedro Gonnet's avatar
Pedro Gonnet committed
1019
1020
  for (cid = 0; cid < s->nr_cells; cid++)
    rec_map_cells_post(&s->cells[cid], full, fun, data);
1021
}
1022

Pedro Gonnet's avatar
Pedro Gonnet committed
1023
1024
1025
static void rec_map_cells_pre(struct cell *c, int full,
                              void (*fun)(struct cell *c, void *data),
                              void *data) {
1026

1027
  int k;
Pedro Gonnet's avatar
Pedro Gonnet committed
1028

1029
1030
  /* No progeny? */
  if (full || !c->split) fun(c, data);
Pedro Gonnet's avatar
Pedro Gonnet committed
1031

1032
1033
1034
  /* Recurse. */
  if (c->split)
    for (k = 0; k < 8; k++)
Pedro Gonnet's avatar
Pedro Gonnet committed
1035
1036
      if (c->progeny[k] != NULL)
        rec_map_cells_pre(c->progeny[k], full, fun, data);
1037
}
Pedro Gonnet's avatar
Pedro Gonnet committed
1038

1039
1040
1041
1042
1043
1044
1045
1046
/**
 * @brief Calls function fun on the cells in the space s
 *
 * @param s The #space
 * @param full If true calls the function on all cells and not just on leaves
 * @param fun The function to call.
 * @param data Additional data passed to fun() when called
 */
1047
void space_map_cells_pre(struct space *s, int full,
Pedro Gonnet's avatar
Pedro Gonnet committed
1048
                         void (*fun)(struct cell *c, void *data), void *data) {
1049

1050
  int cid = 0;
1051
1052

  /* Call the recursive function on all higher-level cells. */
Pedro Gonnet's avatar
Pedro Gonnet committed
1053
1054
  for (cid = 0; cid < s->nr_cells; cid++)
    rec_map_cells_pre(&s->cells[cid], full, fun, data);
1055
}
Pedro Gonnet's avatar
Pedro Gonnet committed
1056
1057
1058
1059
1060
1061
1062

/**
 * @brief Split cells that contain too many particles.
 *
 * @param s The #space we are working in.
 * @param c The #cell under consideration.
 */
1063

1064
void space_do_split(struct space *s, struct cell *c) {
1065

1066
1067
1068
  const int count = c->count;
  const int gcount = c->gcount;
  int maxdepth = 0;
1069
1070
  float h_max = 0.0f;
  int ti_end_min = max_nr_timesteps, ti_end_max = 0;
1071
  struct cell *temp;
1072
1073
  struct part *parts = c->parts;
  struct gpart *gparts = c->gparts;
1074
  struct xpart *xparts = c->xparts;
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085

  /* Check the depth. */
  if (c->depth > s->maxdepth) s->maxdepth = c->depth;

  /* Split or let it be? */
  if (count > space_splitsize || gcount > space_splitsize) {

    /* No longer just a leaf. */
    c->split = 1;

    /* Create the cell's progeny. */
1086
    for (int k = 0; k < 8; k++) {
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
      temp = space_getcell(s);
      temp->count = 0;
      temp->gcount = 0;
      temp->loc[0] = c->loc[0];
      temp->loc[1] = c->loc[1];
      temp->loc[2] = c->loc[2];
      temp->h[0] = c->h[0] / 2;
      temp->h[1] = c->h[1] / 2;
      temp->