space.c 71.8 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "const.h"
45
#include "cooling.h"
46
#include "engine.h"
47
#include "error.h"
48
49
#include "gravity.h"
#include "hydro.h"
50
#include "kernel_hydro.h"
51
#include "lock.h"
52
#include "memswap.h"
53
#include "minmax.h"
54
#include "runner.h"
55
#include "stars.h"
56
#include "threadpool.h"
57
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
58
59
60

/* Split size. */
int space_splitsize = space_splitsize_default;
61
int space_subsize = space_subsize_default;
62
int space_maxsize = space_maxsize_default;
63
int space_maxcount = space_maxcount_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
64
65
66

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
111
  struct spart *sparts;
112
113
114
115
116
117
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

118
119
120
121
122
123
124
125
126
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  struct cell *cells;
  int *ind;
};

127
128
129
130
131
132
133
134
135
136
137
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
138
139
140
141
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
142
143
144
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
145
146
147
148
149
150
151
152
153
154
155
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
156
  int sid = 0;
157
  for (int k = 0; k < 3; k++)
158
159
160
161
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
162
    struct cell *temp = *ci;
163
164
    *ci = *cj;
    *cj = temp;
165
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
166
167
168
169
170
171
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
172

173
/**
174
 * @brief Recursively dismantle a cell tree.
175
 *
176
177
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
178
179
 * @param rec_begin Pointer to the start of the list of cells to recycle.
 * @param rec_end Pointer to the end of the list of cells to recycle.
180
 */
181
182
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
                               struct cell **rec_begin, struct cell **rec_end) {
183
  if (c->split)
184
    for (int k = 0; k < 8; k++)
185
      if (c->progeny[k] != NULL) {
186
187
188
189
        space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end);
        c->progeny[k]->next = *rec_begin;
        *rec_begin = c->progeny[k];
        if (*rec_end == NULL) *rec_end = *rec_begin;
190
191
192
193
        c->progeny[k] = NULL;
      }
}

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
    struct cell *rec_begin = NULL, *rec_end = NULL;
    space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end);
    if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end);
    c->sorts = NULL;
    c->nr_tasks = 0;
    c->density = NULL;
    c->gradient = NULL;
    c->force = NULL;
    c->grav = NULL;
    c->dx_max = 0.0f;
    c->sorted = 0;
    c->count = 0;
    c->gcount = 0;
215
    c->scount = 0;
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
    c->init = NULL;
    c->extra_ghost = NULL;
    c->ghost = NULL;
    c->kick = NULL;
    c->cooling = NULL;
    c->sourceterms = NULL;
    c->super = c;
    if (c->sort != NULL) {
      free(c->sort);
      c->sort = NULL;
    }
#if WITH_MPI
    c->recv_xv = NULL;
    c->recv_rho = NULL;
    c->recv_gradient = NULL;
    c->recv_ti = NULL;

    c->send_xv = NULL;
    c->send_rho = NULL;
    c->send_gradient = NULL;
    c->send_ti = NULL;
#endif
  }
}

241
/**
242
 * @brief Re-build the top-level cell grid.
243
 *
244
 * @param s The #space.
245
 * @param verbose Print messages to stdout or not.
246
 */
247
void space_regrid(struct space *s, int verbose) {
248

249
  const size_t nr_parts = s->nr_parts;
250
  const ticks tic = getticks();
251
  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
252

253
  /* Run through the cells and get the current h_max. */
254
  // tic = getticks();
255
  float h_max = s->cell_min / kernel_gamma / space_stretch;
256
  if (nr_parts > 0) {
257
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
258
      for (int k = 0; k < s->nr_cells; k++) {
259
260
261
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
262
        }
263
264
      }
    } else {
265
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
266
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
267
      }
268
269
270
271
272
273
274
275
276
277
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
278
      error("Failed to aggregate the rebuild flag across nodes.");
279
280
281
    h_max = buff;
  }
#endif
282
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
283
284

  /* Get the new putative cell dimensions. */
285
  const int cdim[3] = {
286
287
288
289
290
291
      floor(s->dim[0] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[1] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[2] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
292
293
294
295
296

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
297
298
299
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
300
301
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
302
        " - the (minimal) time-step is too large leading to particles with "
303
304
305
        "predicted smoothing lengths too large for the box size,\n"
        " - particle with velocities so large that they move by more than two "
        "box sizes per time-step.\n");
306

307
308
309
310
311
312
  /* Check if we have enough cells for gravity. */
  if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
    error(
        "Must have at least 8 cells in each spatial dimension when gravity "
        "is switched on.");

313
314
315
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
316
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
317
  double oldwidth[3];
318
319
320
321
322
323
324
325
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
326
327
328
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
329
330
331
332
333
334
335
336
337

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
338
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
339
340
341
342
343
        }
      }
    }
  }

344
345
346
347
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
348
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
349
350
351
      cdim[2] < s->cdim[2]) {

    /* Free the old cells, if they were allocated. */
352
    if (s->cells_top != NULL) {
353
354
      threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                     s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
355
      free(s->cells_top);
356
357
358
359
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
360
    for (int k = 0; k < 3; k++) {
361
      s->cdim[k] = cdim[k];
362
363
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
364
    }
365
    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
366
367
368

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
369
    if (posix_memalign((void *)&s->cells_top, cell_align,
370
371
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
372
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
373
374

    /* Set the cells' locks */
375
    for (int k = 0; k < s->nr_cells; k++)
376
377
      if (lock_init(&s->cells_top[k].lock) != 0)
        error("Failed to init spinlock.");
378
379

    /* Set the cell location and sizes. */
380
381
382
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
383
          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
384
385
386
387
388
389
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
390
391
392
393
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
394
          c->scount = 0;
395
          c->super = c;
396
          c->ti_old = ti_current;
397
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
398
        }
399
400

    /* Be verbose about the change. */
401
402
403
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
404

405
#ifdef WITH_MPI
406
407
408
409
410
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
411
412
413
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
414

Matthieu Schaller's avatar
Matthieu Schaller committed
415
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
434

435
436
      /* Finished with these. */
      free(oldnodeIDs);
437
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
438
#endif /* WITH_MPI */
439
440
441
442

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

443
  }      /* re-build upper-level cells? */
444
  else { /* Otherwise, just clean up the cells. */
445
446

    /* Free the old cells, if they were allocated. */
447
448
    threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                   s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
449
450
    s->maxdepth = 0;
  }
451
452
453
454

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
455
}
456
457
458
459
460

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
461
 * @param verbose Print messages to stdout or not
462
463
 *
 */
464
void space_rebuild(struct space *s, int verbose) {
465

Matthieu Schaller's avatar
Matthieu Schaller committed
466
  const ticks tic = getticks();
467
468

  /* Be verbose about this. */
469
  // message("re)building space..."); fflush(stdout);
470
471

  /* Re-grid if necessary, or just re-set the cell data. */
472
  space_regrid(s, verbose);
473

Pedro Gonnet's avatar
Pedro Gonnet committed
474
475
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
476
  size_t nr_sparts = s->nr_sparts;
477
  struct cell *restrict cells_top = s->cells_top;
478
  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
479

Pedro Gonnet's avatar
Pedro Gonnet committed
480
481
482
  /* Run through the particles and get their cell index. Allocates
     an index that is larger than the number of particles to avoid
     re-allocating after shuffling. */
483
  const size_t ind_size = s->size_parts + 100;
484
485
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
486
    error("Failed to allocate temporary particle indices.");
487
  if (s->size_parts > 0) space_parts_get_cell_index(s, ind, cells_top, verbose);
488

489
  /* Run through the gravity particles and get their cell index. */
490
  const size_t gind_size = s->size_gparts + 100;
491
492
493
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
Pedro Gonnet's avatar
Pedro Gonnet committed
494
495
  if (s->size_gparts > 0)
    space_gparts_get_cell_index(s, gind, cells_top, verbose);
496

497
498
499
500
501
502
503
504
  /* Run through the star particles and get their cell index. */
  const size_t sind_size = s->size_sparts + 100;
  int *sind;
  if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL)
    error("Failed to allocate temporary s-particle indices.");
  if (s->size_sparts > 0)
    space_sparts_get_cell_index(s, sind, cells_top, verbose);

505
#ifdef WITH_MPI
506

507
  /* Move non-local parts to the end of the list. */
508
  const int local_nodeID = s->e->nodeID;
509
  for (size_t k = 0; k < nr_parts;) {
510
    if (cells_top[ind[k]].nodeID != local_nodeID) {
511
      nr_parts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
512
      const struct part tp = s->parts[k];
513
514
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
515
      if (s->parts[k].gpart != NULL) {
516
        s->parts[k].gpart->id_or_neg_offset = -k;
517
518
      }
      if (s->parts[nr_parts].gpart != NULL) {
519
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
520
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
521
      const struct xpart txp = s->xparts[k];
522
523
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
524
      const int t = ind[k];
525
526
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
527
    } else {
528
529
530
531
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
532

533
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
534
  /* Check that all parts are in the correct places. */
535
  for (size_t k = 0; k < nr_parts; k++) {
536
    if (cells_top[ind[k]].nodeID != local_nodeID) {
537
538
539
540
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
541
    if (cells_top[ind[k]].nodeID == local_nodeID) {
542
      error("Failed to remove local parts from send list");
543
    }
544
545
  }
#endif
546

547
  /* Move non-local gparts to the end of the list. */
548
  for (size_t k = 0; k < nr_gparts;) {
549
    if (cells_top[gind[k]].nodeID != local_nodeID) {
550
      nr_gparts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
551
      const struct gpart tp = s->gparts[k];
552
553
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
554
555
      if (s->gparts[k].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
556
      }
557
558
559
      if (s->gparts[nr_gparts].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
560
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
561
562
563
      const int t = gind[k];
      gind[k] = gind[nr_gparts];
      gind[nr_gparts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
564
    } else {
565
566
567
568
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
569

570
#ifdef SWIFT_DEBUG_CHECKS
571
572
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
573
    if (cells_top[gind[k]].nodeID != local_nodeID) {
574
575
576
577
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
578
    if (cells_top[gind[k]].nodeID == local_nodeID) {
579
      error("Failed to remove local gparts from send list");
580
    }
581
582
  }
#endif
583

584
585
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
586
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
587
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
588
589
590
591
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);

  /* Set the new particle counts. */
592
  s->nr_parts = nr_parts + nr_parts_exchanged;
593
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
594
595

  /* Re-allocate the index array if needed.. */
596
  if (s->nr_parts + 1 > ind_size) {
597
    int *ind_new;
598
    if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL)
599
      error("Failed to allocate temporary particle indices.");
600
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
601
602
    free(ind);
    ind = ind_new;
603
604
  }

605
606
607
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

608
  /* Assign each particle to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
609
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
610
    const struct part *const p = &s->parts[k];
611
    ind[k] =
612
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
613
#ifdef SWIFT_DEBUG_CHECKS
614
    if (cells_top[ind[k]].nodeID != local_nodeID)
615
      error("Received part that does not belong to me (nodeID=%i).",
616
            cells_top[ind[k]].nodeID);
617
#endif
618
  }
619
  nr_parts = s->nr_parts;
620
621

#endif /* WITH_MPI */
622
623

  /* Sort the parts according to their cells. */
624
625
  if (nr_parts > 0)
    space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
626

627
628
629
630
631
632
633
634
635
  /* Sort the sparts according to their cells. */
  if (nr_sparts > 0)
    space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose);

  /* Re-link the gparts to their (s-)particles. */
  if (nr_parts > 0 && nr_gparts > 0)
    part_relink_gparts_to_parts(s->parts, nr_parts, 0);
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0);
636

637
#ifdef SWIFT_DEBUG_CHECKS
638
  /* Verify space_sort_struct. */
639
640
641
  for (size_t k = 1; k < nr_parts; k++) {
    if (ind[k - 1] > ind[k]) {
      error("Sort failed!");
642
    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
643
644
                                    s->parts[k].x[1] * s->iwidth[1],
                                    s->parts[k].x[2] * s->iwidth[2])) {
645
646
647
648
      error("Incorrect indices!");
    }
  }
#endif
649

650
651
652
653
654
655
656
657
658
659
  /* Extract the cell counts from the sorted indices. */
  size_t last_index = 0;
  ind[nr_parts] = s->nr_cells;  // sentinel.
  for (size_t k = 0; k < nr_parts; k++) {
    if (ind[k] < ind[k + 1]) {
      cells_top[ind[k]].count = k - last_index + 1;
      last_index = k + 1;
    }
  }

660
661
662
663
664
665
666
667
668
669
  /* Extract the cell counts from the sorted indices. */
  size_t last_sindex = 0;
  sind[nr_parts] = s->nr_cells;  // sentinel.
  for (size_t k = 0; k < nr_sparts; k++) {
    if (sind[k] < sind[k + 1]) {
      cells_top[sind[k]].scount = k - last_sindex + 1;
      last_sindex = k + 1;
    }
  }

670
  /* We no longer need the indices as of here. */
671
  free(ind);
672
  free(sind);
673

674
675
676
#ifdef WITH_MPI

  /* Re-allocate the index array if needed.. */
677
  if (s->nr_gparts + 1 > gind_size) {
678
    int *gind_new;
679
    if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL)
680
      error("Failed to allocate temporary g-particle indices.");
681
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
682
683
684
685
686
    free(gind);
    gind = gind_new;
  }

  /* Assign each particle to its cell. */
687
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
688
    const struct gpart *const p = &s->gparts[k];
689
690
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
691
692

#ifdef SWIFT_DEBUG_CHECKS
693
    if (cells_top[gind[k]].nodeID != s->e->nodeID)
694
      error("Received part that does not belong to me (nodeID=%i).",
695
            cells_top[gind[k]].nodeID);
696
#endif
697
698
699
  }
  nr_gparts = s->nr_gparts;

700
#endif /* WITH_MPI */
701

702
  /* Sort the gparts according to their cells. */
703
704
  if (nr_gparts > 0)
    space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
705
706

  /* Re-link the parts. */
707
  if (nr_parts > 0 && nr_gparts > 0)
708
709
710
711
712
    part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts);

  /* Re-link the sparts. */
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts);
713

714
715
  /* Extract the cell counts from the sorted indices. */
  size_t last_gindex = 0;
716
  gind[nr_gparts] = s->nr_cells;
717
718
  for (size_t k = 0; k < nr_gparts; k++) {
    if (gind[k] < gind[k + 1]) {
719
      cells_top[gind[k]].gcount = k - last_gindex + 1;
720
721
722
723
      last_gindex = k + 1;
    }
  }

724
  /* We no longer need the indices as of here. */
725
  free(gind);
726

727
#ifdef SWIFT_DEBUG_CHECKS
728
729
730
  /* Verify that the links are correct */
  for (size_t k = 0; k < nr_gparts; ++k) {

731
    if (s->gparts[k].id_or_neg_offset < 0) {
732

733
      const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset];
734

735
736
737
738
      if (part->gpart != &s->gparts[k]) error("Linking problem !");

      if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] ||
          s->gparts[k].x[2] != part->x[2])
739
740
741
742
743
        error("Linked particles are not at the same position !");
    }
  }
  for (size_t k = 0; k < nr_parts; ++k) {

744
    if (s->parts[k].gpart != NULL &&
745
        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
746
      error("Linking problem !");
747
748
    }
  }
749
#endif
750

751
752
  /* Hook the cells up to the parts. */
  // tic = getticks();
753
754
755
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
756
  struct spart *sfinger = s->sparts;
757
  for (int k = 0; k < s->nr_cells; k++) {
758
    struct cell *restrict c = &cells_top[k];
759
    c->ti_old = ti_current;
760
761
762
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
763
    c->sparts = sfinger;
764
765
766
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
767
    sfinger = &sfinger[c->scount];
768
  }
769
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
770
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
771
772
773

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
774
  space_split(s, cells_top, s->nr_cells, verbose);
775
776
777
778
779
780
781
782
783

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
784
785
 * This is done in parallel using threads in the #threadpool.
 *
786
 * @param s The #space.
787
788
 * @param cells The cell hierarchy.
 * @param nr_cells The number of cells.
789
790
 * @param verbose Are we talkative ?
 */
791
792
void space_split(struct space *s, struct cell *cells, int nr_cells,
                 int verbose) {
793

Matthieu Schaller's avatar
Matthieu Schaller committed
794
  const ticks tic = getticks();
795

796
  threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells,
797
                 sizeof(struct cell), 1, s);
798

799
800
801
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
802
}
803

804
805
806
807
808
809
810
811
/**
 * @brief Runs through the top-level cells and checks whether tasks associated
 * with them can be split. If not, try to sanitize the cells.
 *
 * @param s The #space to act upon.
 */
void space_sanitize(struct space *s) {

812
813
  s->sanitized = 1;

814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
  for (int k = 0; k < s->nr_cells; k++) {

    struct cell *c = &s->cells_top[k];
    const double min_width = c->dmin;

    /* Do we have a problem ? */
    if (c->h_max * kernel_gamma * space_stretch > min_width * 0.5 &&
        c->count > space_maxcount) {

      /* Ok, clean-up the mess */
      cell_sanitize(c);
    }
  }
}

829
/**
830
 * @brief #threadpool mapper function to compute the particle cell indices.
831
 *
832
 * @param map_data Pointer towards the particles.
833
 * @param nr_parts The number of particles to treat.
834
 * @param extra_data Pointers to the space and index list
835
 */
836
837
void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
                                       void *extra_data) {
838

839
840
841
842
  /* Unpack the data */
  struct part *restrict parts = (struct part *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
843
  int *const ind = data->ind + (ptrdiff_t)(parts - s->parts);
844
845

  /* Get some constants */
846
847
848
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
849
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
850
851
852
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];
853

854
  for (int k = 0; k < nr_parts; k++) {
855
856
857
858

    /* Get the particle */
    struct part *restrict p = &parts[k];

859
860
861
862
    const double old_pos_x = p->x[0];
    const double old_pos_y = p->x[1];
    const double old_pos_z = p->x[2];

863
    /* Put it back into the simulation volume */
864
865
866
    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
867
868

    /* Get its cell index */
Matthieu Schaller's avatar
Matthieu Schaller committed
869
870
    const int index =
        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
871
    ind[k] = index;
872

873
874
875
876
    /* Update the position */
    p->x[0] = pos_x;
    p->x[1] = pos_y;
    p->x[2] = pos_z;
877
878
879
880
  }
}

/**
881
 * @brief #threadpool mapper function to compute the g-particle cell indices.
882
 *
883
 * @param map_data Pointer towards the g-particles.
884
 * @param nr_gparts The number of g-particles to treat.
885
 * @param extra_data Pointers to the space and index list
886
 */
887
888
void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts,
                                        void *extra_data) {
889

890
891
892
893
  /* Unpack the data */
  struct gpart *restrict gparts = (struct gpart *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
894
  int *const ind = data->ind + (ptrdiff_t)(gparts - s->gparts);
895
896

  /* Get some constants */
Matthieu Schaller's avatar
Matthieu Schaller committed
897
898
899
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
900
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
Matthieu Schaller's avatar
Matthieu Schaller committed
901
902
903
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];
904

905
  for (int k = 0; k < nr_gparts; k++) {
906
907
908
909

    /* Get the particle */
    struct gpart *restrict gp = &gparts[k];

Matthieu Schaller's avatar
Matthieu Schaller committed
910
911
912
913
    const double old_pos_x = gp->x[0];
    const double old_pos_y = gp->x[1];
    const double old_pos_z = gp->x[2];

914
    /* Put it back into the simulation volume */
Matthieu Schaller's avatar
Matthieu Schaller committed
915
916
917
    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
918
919
920

    /* Get its cell index */
    const int index =
Matthieu Schaller's avatar
Matthieu Schaller committed
921
        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
922
    ind[k] = index;
Matthieu Schaller's avatar
Matthieu Schaller committed
923
924
925
926
927

    /* Update the position */
    gp->x[0] = pos_x;
    gp->x[1] = pos_y;
    gp->x[2] = pos_z;
928
929
930
  }
}

931
/**
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
 * @brief #threadpool mapper function to compute the s-particle cell indices.
 *
 * @param map_data Pointer towards the s-particles.
 * @param nr_sparts The number of s-particles to treat.
 * @param extra_data Pointers to the space and index list
 */
void space_sparts_get_cell_index_mapper(void *map_data, int nr_sparts,
                                        void *extra_data) {

  /* Unpack the data */
  struct spart *restrict sparts = (struct spart *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
  int *const ind = data->ind + (ptrdiff_t)(sparts - s->sparts);

  /* Get some constants */
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};