space.c 73.7 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "const.h"
45
#include "cooling.h"
46
#include "engine.h"
47
#include "error.h"
48
49
#include "gravity.h"
#include "hydro.h"
50
#include "kernel_hydro.h"
51
#include "lock.h"
52
#include "memswap.h"
53
#include "minmax.h"
54
#include "runner.h"
55
#include "stars.h"
56
#include "threadpool.h"
57
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
58
59
60

/* Split size. */
int space_splitsize = space_splitsize_default;
61
int space_subsize = space_subsize_default;
62
int space_maxsize = space_maxsize_default;
63
int space_maxcount = space_maxcount_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
64
65
66

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
111
  struct spart *sparts;
112
113
114
115
116
117
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

118
119
120
121
122
123
124
125
126
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  struct cell *cells;
  int *ind;
};

127
128
129
130
131
132
133
134
135
136
137
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
138
139
140
141
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
142
143
144
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
145
146
147
148
149
150
151
152
153
154
155
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
156
  int sid = 0;
157
  for (int k = 0; k < 3; k++)
158
159
160
161
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
162
    struct cell *temp = *ci;
163
164
    *ci = *cj;
    *cj = temp;
165
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
166
167
168
169
170
171
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
172

173
/**
174
 * @brief Recursively dismantle a cell tree.
175
 *
176
177
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
178
179
 * @param rec_begin Pointer to the start of the list of cells to recycle.
 * @param rec_end Pointer to the end of the list of cells to recycle.
180
 */
181
182
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
                               struct cell **rec_begin, struct cell **rec_end) {
183
  if (c->split)
184
    for (int k = 0; k < 8; k++)
185
      if (c->progeny[k] != NULL) {
186
187
188
189
        space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end);
        c->progeny[k]->next = *rec_begin;
        *rec_begin = c->progeny[k];
        if (*rec_end == NULL) *rec_end = *rec_begin;
190
191
192
193
        c->progeny[k] = NULL;
      }
}

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
    struct cell *rec_begin = NULL, *rec_end = NULL;
    space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end);
    if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end);
    c->sorts = NULL;
    c->nr_tasks = 0;
    c->density = NULL;
    c->gradient = NULL;
    c->force = NULL;
    c->grav = NULL;
    c->dx_max = 0.0f;
    c->sorted = 0;
    c->count = 0;
    c->gcount = 0;
215
    c->scount = 0;
216
217
218
    c->init = NULL;
    c->extra_ghost = NULL;
    c->ghost = NULL;
219
220
    c->kick1 = NULL;
    c->kick2 = NULL;
221
    c->timestep = NULL;
222
    c->drift = NULL;
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    c->cooling = NULL;
    c->sourceterms = NULL;
    c->super = c;
    if (c->sort != NULL) {
      free(c->sort);
      c->sort = NULL;
    }
#if WITH_MPI
    c->recv_xv = NULL;
    c->recv_rho = NULL;
    c->recv_gradient = NULL;
    c->recv_ti = NULL;

    c->send_xv = NULL;
    c->send_rho = NULL;
    c->send_gradient = NULL;
    c->send_ti = NULL;
#endif
  }
}

244
/**
245
 * @brief Re-build the top-level cell grid.
246
 *
247
 * @param s The #space.
248
 * @param verbose Print messages to stdout or not.
249
 */
250
void space_regrid(struct space *s, int verbose) {
251

252
  const size_t nr_parts = s->nr_parts;
253
  const ticks tic = getticks();
254
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
255

256
  /* Run through the cells and get the current h_max. */
257
  // tic = getticks();
258
  float h_max = s->cell_min / kernel_gamma / space_stretch;
259
  if (nr_parts > 0) {
260
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
261
      for (int k = 0; k < s->nr_cells; k++) {
262
263
264
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
265
        }
266
267
      }
    } else {
268
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
269
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
270
      }
271
272
273
274
275
276
277
278
279
280
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
281
      error("Failed to aggregate the rebuild flag across nodes.");
282
283
284
    h_max = buff;
  }
#endif
285
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
286
287

  /* Get the new putative cell dimensions. */
288
  const int cdim[3] = {
289
290
291
292
293
294
      floor(s->dim[0] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[1] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[2] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
295
296
297
298
299

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
300
301
302
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
303
304
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
305
        " - the (minimal) time-step is too large leading to particles with "
306
307
308
        "predicted smoothing lengths too large for the box size,\n"
        " - particle with velocities so large that they move by more than two "
        "box sizes per time-step.\n");
309

310
311
312
313
314
315
  /* Check if we have enough cells for gravity. */
  if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
    error(
        "Must have at least 8 cells in each spatial dimension when gravity "
        "is switched on.");

316
317
318
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
319
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
320
  double oldwidth[3];
321
322
323
324
325
326
327
328
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
329
330
331
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
332
333
334
335
336
337
338
339
340

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
341
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
342
343
344
345
346
        }
      }
    }
  }

347
348
349
350
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
351
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
352
353
      cdim[2] < s->cdim[2]) {

354
355
356
357
358
359
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
    message("re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
    fflush(stdout);
#endif

360
    /* Free the old cells, if they were allocated. */
361
    if (s->cells_top != NULL) {
362
363
      threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                     s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
364
      free(s->cells_top);
365
366
367
368
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
369
    for (int k = 0; k < 3; k++) {
370
      s->cdim[k] = cdim[k];
371
372
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
373
    }
374
    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
375
376
377

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
378
    if (posix_memalign((void *)&s->cells_top, cell_align,
379
380
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
381
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
382
383

    /* Set the cells' locks */
384
    for (int k = 0; k < s->nr_cells; k++)
385
386
      if (lock_init(&s->cells_top[k].lock) != 0)
        error("Failed to init spinlock.");
387
388

    /* Set the cell location and sizes. */
389
390
391
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
392
          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
393
394
395
396
397
398
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
399
400
401
402
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
403
          c->scount = 0;
404
          c->super = c;
405
          c->ti_old = ti_current;
406
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
407
        }
408
409

    /* Be verbose about the change. */
410
411
412
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
413

414
#ifdef WITH_MPI
415
416
417
418
419
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
420
421
422
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
423

Matthieu Schaller's avatar
Matthieu Schaller committed
424
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
443

444
445
      /* Finished with these. */
      free(oldnodeIDs);
446
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
447
#endif /* WITH_MPI */
448
449
450
451

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

452
  }      /* re-build upper-level cells? */
453
  else { /* Otherwise, just clean up the cells. */
454
455

    /* Free the old cells, if they were allocated. */
456
457
    threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                   s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
458
459
    s->maxdepth = 0;
  }
460
461
462
463

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
464
}
465
466
467
468
469

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
470
 * @param verbose Print messages to stdout or not
471
472
 *
 */
473
void space_rebuild(struct space *s, int verbose) {
474

Matthieu Schaller's avatar
Matthieu Schaller committed
475
  const ticks tic = getticks();
476

477
478
479
480
481
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
  message("re)building space");
  fflush(stdout);
#endif
482
483

  /* Re-grid if necessary, or just re-set the cell data. */
484
  space_regrid(s, verbose);
485

Pedro Gonnet's avatar
Pedro Gonnet committed
486
487
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
488
  size_t nr_sparts = s->nr_sparts;
489
  struct cell *restrict cells_top = s->cells_top;
490
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
491

Pedro Gonnet's avatar
Pedro Gonnet committed
492
493
494
  /* Run through the particles and get their cell index. Allocates
     an index that is larger than the number of particles to avoid
     re-allocating after shuffling. */
495
  const size_t ind_size = s->size_parts + 100;
496
497
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
498
    error("Failed to allocate temporary particle indices.");
499
  if (s->size_parts > 0) space_parts_get_cell_index(s, ind, cells_top, verbose);
500

501
  /* Run through the gravity particles and get their cell index. */
502
  const size_t gind_size = s->size_gparts + 100;
503
504
505
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
Pedro Gonnet's avatar
Pedro Gonnet committed
506
507
  if (s->size_gparts > 0)
    space_gparts_get_cell_index(s, gind, cells_top, verbose);
508

509
510
511
512
513
514
515
516
  /* Run through the star particles and get their cell index. */
  const size_t sind_size = s->size_sparts + 100;
  int *sind;
  if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL)
    error("Failed to allocate temporary s-particle indices.");
  if (s->size_sparts > 0)
    space_sparts_get_cell_index(s, sind, cells_top, verbose);

517
#ifdef WITH_MPI
518

519
  /* Move non-local parts to the end of the list. */
520
  const int local_nodeID = s->e->nodeID;
521
  for (size_t k = 0; k < nr_parts;) {
522
    if (cells_top[ind[k]].nodeID != local_nodeID) {
523
      nr_parts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
524
      const struct part tp = s->parts[k];
525
526
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
527
      if (s->parts[k].gpart != NULL) {
528
        s->parts[k].gpart->id_or_neg_offset = -k;
529
530
      }
      if (s->parts[nr_parts].gpart != NULL) {
531
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
532
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
533
      const struct xpart txp = s->xparts[k];
534
535
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
536
      const int t = ind[k];
537
538
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
539
    } else {
540
541
542
543
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
544

545
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
546
  /* Check that all parts are in the correct places. */
547
  for (size_t k = 0; k < nr_parts; k++) {
548
    if (cells_top[ind[k]].nodeID != local_nodeID) {
549
550
551
552
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
553
    if (cells_top[ind[k]].nodeID == local_nodeID) {
554
      error("Failed to remove local parts from send list");
555
    }
556
557
  }
#endif
558

559
  /* Move non-local gparts to the end of the list. */
560
  for (size_t k = 0; k < nr_gparts;) {
561
    if (cells_top[gind[k]].nodeID != local_nodeID) {
562
      nr_gparts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
563
      const struct gpart tp = s->gparts[k];
564
565
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
566
567
      if (s->gparts[k].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
568
      }
569
570
571
      if (s->gparts[nr_gparts].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
572
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
573
574
575
      const int t = gind[k];
      gind[k] = gind[nr_gparts];
      gind[nr_gparts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
576
    } else {
577
578
579
580
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
581

582
#ifdef SWIFT_DEBUG_CHECKS
583
584
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
585
    if (cells_top[gind[k]].nodeID != local_nodeID) {
586
587
588
589
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
590
    if (cells_top[gind[k]].nodeID == local_nodeID) {
591
      error("Failed to remove local gparts from send list");
592
    }
593
594
  }
#endif
595

596
597
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
598
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
599
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
600
601
602
603
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);

  /* Set the new particle counts. */
604
  s->nr_parts = nr_parts + nr_parts_exchanged;
605
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
606
607

  /* Re-allocate the index array if needed.. */
608
  if (s->nr_parts + 1 > ind_size) {
609
    int *ind_new;
610
    if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL)
611
      error("Failed to allocate temporary particle indices.");
612
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
613
614
    free(ind);
    ind = ind_new;
615
616
  }

617
618
619
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

620
  /* Assign each particle to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
621
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
622
    const struct part *const p = &s->parts[k];
623
    ind[k] =
624
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
625
#ifdef SWIFT_DEBUG_CHECKS
626
    if (cells_top[ind[k]].nodeID != local_nodeID)
627
      error("Received part that does not belong to me (nodeID=%i).",
628
            cells_top[ind[k]].nodeID);
629
#endif
630
  }
631
  nr_parts = s->nr_parts;
632
633

#endif /* WITH_MPI */
634
635

  /* Sort the parts according to their cells. */
636
637
  if (nr_parts > 0)
    space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
638

639
640
641
642
643
644
645
646
647
  /* Sort the sparts according to their cells. */
  if (nr_sparts > 0)
    space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose);

  /* Re-link the gparts to their (s-)particles. */
  if (nr_parts > 0 && nr_gparts > 0)
    part_relink_gparts_to_parts(s->parts, nr_parts, 0);
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0);
648

649
#ifdef SWIFT_DEBUG_CHECKS
650
  /* Verify space_sort_struct. */
651
652
653
  for (size_t k = 1; k < nr_parts; k++) {
    if (ind[k - 1] > ind[k]) {
      error("Sort failed!");
654
    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
655
656
                                    s->parts[k].x[1] * s->iwidth[1],
                                    s->parts[k].x[2] * s->iwidth[2])) {
657
658
659
660
      error("Incorrect indices!");
    }
  }
#endif
661

662
663
664
665
666
667
668
669
670
671
  /* Extract the cell counts from the sorted indices. */
  size_t last_index = 0;
  ind[nr_parts] = s->nr_cells;  // sentinel.
  for (size_t k = 0; k < nr_parts; k++) {
    if (ind[k] < ind[k + 1]) {
      cells_top[ind[k]].count = k - last_index + 1;
      last_index = k + 1;
    }
  }

672
673
  /* Extract the cell counts from the sorted indices. */
  size_t last_sindex = 0;
674
  sind[nr_sparts] = s->nr_cells;  // sentinel.
675
676
677
678
679
680
681
  for (size_t k = 0; k < nr_sparts; k++) {
    if (sind[k] < sind[k + 1]) {
      cells_top[sind[k]].scount = k - last_sindex + 1;
      last_sindex = k + 1;
    }
  }

682
  /* We no longer need the indices as of here. */
683
  free(ind);
684
  free(sind);
685

686
687
688
#ifdef WITH_MPI

  /* Re-allocate the index array if needed.. */
689
  if (s->nr_gparts + 1 > gind_size) {
690
    int *gind_new;
691
    if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL)
692
      error("Failed to allocate temporary g-particle indices.");
693
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
694
695
696
697
698
    free(gind);
    gind = gind_new;
  }

  /* Assign each particle to its cell. */
699
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
700
    const struct gpart *const p = &s->gparts[k];
701
702
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
703
704

#ifdef SWIFT_DEBUG_CHECKS
705
    if (cells_top[gind[k]].nodeID != s->e->nodeID)
706
      error("Received part that does not belong to me (nodeID=%i).",
707
            cells_top[gind[k]].nodeID);
708
#endif
709
710
711
  }
  nr_gparts = s->nr_gparts;

712
#endif /* WITH_MPI */
713

714
  /* Sort the gparts according to their cells. */
715
716
  if (nr_gparts > 0)
    space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
717
718

  /* Re-link the parts. */
719
  if (nr_parts > 0 && nr_gparts > 0)
720
721
722
723
724
    part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts);

  /* Re-link the sparts. */
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts);
725

726
727
  /* Extract the cell counts from the sorted indices. */
  size_t last_gindex = 0;
728
  gind[nr_gparts] = s->nr_cells;
729
730
  for (size_t k = 0; k < nr_gparts; k++) {
    if (gind[k] < gind[k + 1]) {
731
      cells_top[gind[k]].gcount = k - last_gindex + 1;
732
733
734
735
      last_gindex = k + 1;
    }
  }

736
  /* We no longer need the indices as of here. */
737
  free(gind);
738

739
#ifdef SWIFT_DEBUG_CHECKS
740
  /* Verify that the links are correct */
741
742
  part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
                    nr_sparts);
743
#endif
744

745
746
  /* Hook the cells up to the parts. */
  // tic = getticks();
747
748
749
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
750
  struct spart *sfinger = s->sparts;
751
  for (int k = 0; k < s->nr_cells; k++) {
752
    struct cell *restrict c = &cells_top[k];
753
    c->ti_old = ti_current;
754
755
756
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
757
    c->sparts = sfinger;
758
759
760
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
761
    sfinger = &sfinger[c->scount];
762
  }
763
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
764
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
765
766
767

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
768
  space_split(s, cells_top, s->nr_cells, verbose);
769
770
771
772
773
774
775
776
777

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
778
779
 * This is done in parallel using threads in the #threadpool.
 *
780
 * @param s The #space.
781
782
 * @param cells The cell hierarchy.
 * @param nr_cells The number of cells.
783
784
 * @param verbose Are we talkative ?
 */
785
786
void space_split(struct space *s, struct cell *cells, int nr_cells,
                 int verbose) {
787

Matthieu Schaller's avatar
Matthieu Schaller committed
788
  const ticks tic = getticks();
789

790
  threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells,
791
                 sizeof(struct cell), 1, s);
792

793
794
795
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
796
}
797

798
799
800
801
802
803
804
805
/**
 * @brief Runs through the top-level cells and checks whether tasks associated
 * with them can be split. If not, try to sanitize the cells.
 *
 * @param s The #space to act upon.
 */
void space_sanitize(struct space *s) {

806
807
  s->sanitized = 1;

808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
  for (int k = 0; k < s->nr_cells; k++) {

    struct cell *c = &s->cells_top[k];
    const double min_width = c->dmin;

    /* Do we have a problem ? */
    if (c->h_max * kernel_gamma * space_stretch > min_width * 0.5 &&
        c->count > space_maxcount) {

      /* Ok, clean-up the mess */
      cell_sanitize(c);
    }
  }
}

823
/**
824
 * @brief #threadpool mapper function to compute the particle cell indices.
825
 *
826
 * @param map_data Pointer towards the particles.
827
 * @param nr_parts The number of particles to treat.
828
 * @param extra_data Pointers to the space and index list
829
 */
830
831
void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
                                       void *extra_data) {
832

833
834
835
836
  /* Unpack the data */
  struct part *restrict parts = (struct part *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
837
  int *const ind = data->ind + (ptrdiff_t)(parts - s->parts);
838
839

  /* Get some constants */
840
841
842
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
843
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
844
845
846
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];
847

848
  for (int k = 0; k < nr_parts; k++) {
849
850
851
852

    /* Get the particle */
    struct part *restrict p = &parts[k];

853
854
855
856
    const double old_pos_x = p->x[0];
    const double old_pos_y = p->x[1];
    const double old_pos_z = p->x[2];

857
    /* Put it back into the simulation volume */
858
859
860
    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
861
862

    /* Get its cell index */
Matthieu Schaller's avatar
Matthieu Schaller committed
863
864
    const int index =
        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
865
    ind[k] = index;
866

867
868
869
870
    /* Update the position */
    p->x[0] = pos_x;
    p->x[1] = pos_y;
    p->x[2] = pos_z;
871
872
873
874
  }
}

/**
875
 * @brief #threadpool mapper function to compute the g-particle cell indices.
876
 *
877
 * @param map_data Pointer towards the g-particles.
878
 * @param nr_gparts The number of g-particles to treat.
879
 * @param extra_data Pointers to the space and index list
880
 */
881
882
void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts,
                                        void *extra_data) {
883

884
885
886
887
  /* Unpack the data */
  struct gpart *restrict gparts = (struct gpart *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
888
  int *const ind = data->ind + (ptrdiff_t)(gparts - s->gparts);
889
890

  /* Get some constants */
Matthieu Schaller's avatar
Matthieu Schaller committed
891
892
893
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
894
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
Matthieu Schaller's avatar
Matthieu Schaller committed
895
896
897
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];
898

899
  for (int k = 0; k < nr_gparts; k++) {
900
901
902
903

    /* Get the particle */
    struct gpart *restrict gp = &gparts[k];

Matthieu Schaller's avatar
Matthieu Schaller committed
904
905
906
907
    const double old_pos_x = gp->x[0];
    const double old_pos_y = gp->x[1];
    const double old_pos_z = gp->x[2];

908
    /* Put it back into the simulation volume */
Matthieu Schaller's avatar
Matthieu Schaller committed
909
910
911
    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
912
913
914

    /* Get its cell index */
    const int index =
Matthieu Schaller's avatar
Matthieu Schaller committed
915
        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
916
    ind[k] = index;
Matthieu Schaller's avatar
Matthieu Schaller committed
917
918
919
920
921

    /* Update the position */
    gp->x[0] = pos_x;
    gp->x[1] = pos_y;
    gp->x[2] = pos_z;
922
923
924
  }
}

925
/**
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
 * @brief #threadpool mapper function to compute the s-particle cell indices.
 *
 * @param map_data Pointer towards the s-particles.
 * @param nr_sparts The number of s-particles to treat.
 * @param extra_data Pointers to the space and index list
 */
void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts,
                                        void *extra_data) {

  /* Unpack the data */
  struct spart *restrict sparts = (struct spart *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
  int *const ind = data->ind + (ptrdiff_t)(sparts - s->sparts);

  /* Get some constants */
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];

  for (int k = 0; k < nr_sparts; k++) {

    /* Get the particle */
    struct spart *restrict sp = &sparts[k];

    const double old_pos_x = sp->x[0];
    const double old_pos_y = sp->x[1];
    const double old_pos_z = sp->x[2];

    /* Put it back into the simulation volume */
    const double pos_x = box_wrap(old_pos_x, 0.0