space.c 76.5 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "const.h"
45
#include "cooling.h"
46
#include "engine.h"
47
#include "error.h"
48
49
#include "gravity.h"
#include "hydro.h"
50
#include "kernel_hydro.h"
51
#include "lock.h"
52
#include "memswap.h"
53
#include "minmax.h"
54
#include "runner.h"
55
#include "stars.h"
56
#include "threadpool.h"
57
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
58
59
60

/* Split size. */
int space_splitsize = space_splitsize_default;
61
int space_subsize = space_subsize_default;
62
int space_maxsize = space_maxsize_default;
63
int space_maxcount = space_maxcount_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
64
65
66

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
111
  struct spart *sparts;
112
113
114
115
116
117
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

118
119
120
121
122
123
124
125
126
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  struct cell *cells;
  int *ind;
};

127
128
129
130
131
132
133
134
135
136
137
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
138
139
140
141
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
142
143
144
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
145
146
147
148
149
150
151
152
153
154
155
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
156
  int sid = 0;
157
  for (int k = 0; k < 3; k++)
158
159
160
161
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
162
    struct cell *temp = *ci;
163
164
    *ci = *cj;
    *cj = temp;
165
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
166
167
168
169
170
171
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
172

173
/**
174
 * @brief Recursively dismantle a cell tree.
175
 *
176
177
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
178
179
 * @param rec_begin Pointer to the start of the list of cells to recycle.
 * @param rec_end Pointer to the end of the list of cells to recycle.
180
 */
181
182
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
                               struct cell **rec_begin, struct cell **rec_end) {
183
  if (c->split)
184
    for (int k = 0; k < 8; k++)
185
      if (c->progeny[k] != NULL) {
186
187
188
189
        space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end);
        c->progeny[k]->next = *rec_begin;
        *rec_begin = c->progeny[k];
        if (*rec_end == NULL) *rec_end = *rec_begin;
190
191
192
193
        c->progeny[k] = NULL;
      }
}

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
    struct cell *rec_begin = NULL, *rec_end = NULL;
    space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end);
    if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end);
    c->sorts = NULL;
    c->nr_tasks = 0;
    c->density = NULL;
    c->gradient = NULL;
    c->force = NULL;
    c->grav = NULL;
    c->dx_max = 0.0f;
    c->sorted = 0;
    c->count = 0;
    c->gcount = 0;
215
    c->scount = 0;
216
217
218
    c->init = NULL;
    c->extra_ghost = NULL;
    c->ghost = NULL;
219
220
    c->kick1 = NULL;
    c->kick2 = NULL;
221
    c->timestep = NULL;
222
    c->drift = NULL;
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    c->cooling = NULL;
    c->sourceterms = NULL;
    c->super = c;
    if (c->sort != NULL) {
      free(c->sort);
      c->sort = NULL;
    }
#if WITH_MPI
    c->recv_xv = NULL;
    c->recv_rho = NULL;
    c->recv_gradient = NULL;
    c->recv_ti = NULL;

    c->send_xv = NULL;
    c->send_rho = NULL;
    c->send_gradient = NULL;
    c->send_ti = NULL;
#endif
  }
}

244
/**
245
 * @brief Re-build the top-level cell grid.
246
 *
247
 * @param s The #space.
248
 * @param verbose Print messages to stdout or not.
249
 */
250
void space_regrid(struct space *s, int verbose) {
251

252
  const size_t nr_parts = s->nr_parts;
253
  const ticks tic = getticks();
254
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
255

256
  /* Run through the cells and get the current h_max. */
257
  // tic = getticks();
258
  float h_max = s->cell_min / kernel_gamma / space_stretch;
259
  if (nr_parts > 0) {
260
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
261
      for (int k = 0; k < s->nr_cells; k++) {
262
263
264
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
265
        }
266
267
      }
    } else {
268
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
269
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
270
      }
271
272
273
274
275
276
277
278
279
280
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
281
      error("Failed to aggregate the rebuild flag across nodes.");
282
283
284
    h_max = buff;
  }
#endif
285
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
286
287

  /* Get the new putative cell dimensions. */
288
  const int cdim[3] = {
289
290
291
292
293
294
      floor(s->dim[0] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[1] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[2] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
295
296
297
298
299

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
300
301
302
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
303
304
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
305
        " - the (minimal) time-step is too large leading to particles with "
306
307
308
        "predicted smoothing lengths too large for the box size,\n"
        " - particle with velocities so large that they move by more than two "
        "box sizes per time-step.\n");
309

310
311
312
313
314
315
  /* Check if we have enough cells for gravity. */
  if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
    error(
        "Must have at least 8 cells in each spatial dimension when gravity "
        "is switched on.");

316
317
318
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
319
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
320
  double oldwidth[3];
321
322
323
324
325
326
327
328
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
329
330
331
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
332
333
334
335
336
337
338
339
340

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
341
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
342
343
344
345
346
        }
      }
    }
  }

347
348
349
350
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
351
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
352
353
      cdim[2] < s->cdim[2]) {

354
355
356
357
358
359
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
    message("re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
    fflush(stdout);
#endif

360
    /* Free the old cells, if they were allocated. */
361
    if (s->cells_top != NULL) {
362
363
      threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                     s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
364
      free(s->cells_top);
365
366
367
368
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
369
    for (int k = 0; k < 3; k++) {
370
      s->cdim[k] = cdim[k];
371
372
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
373
    }
374
    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
375
376
377

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
378
    if (posix_memalign((void *)&s->cells_top, cell_align,
379
380
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
381
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
382
383

    /* Set the cells' locks */
384
    for (int k = 0; k < s->nr_cells; k++)
385
386
      if (lock_init(&s->cells_top[k].lock) != 0)
        error("Failed to init spinlock.");
387
388

    /* Set the cell location and sizes. */
389
390
391
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
392
          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
393
394
395
396
397
398
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
399
400
401
402
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
403
          c->scount = 0;
404
          c->super = c;
405
          c->ti_old = ti_current;
406
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
407
        }
408
409

    /* Be verbose about the change. */
410
411
412
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
413

414
#ifdef WITH_MPI
415
416
417
418
419
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
420
421
422
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
423

Matthieu Schaller's avatar
Matthieu Schaller committed
424
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
443

444
445
      /* Finished with these. */
      free(oldnodeIDs);
446
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
447
#endif /* WITH_MPI */
448
449
450
451

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

452
  }      /* re-build upper-level cells? */
453
  else { /* Otherwise, just clean up the cells. */
454
455

    /* Free the old cells, if they were allocated. */
456
457
    threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                   s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
458
459
    s->maxdepth = 0;
  }
460
461
462
463

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
464
}
465
466
467
468
469

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
470
 * @param verbose Print messages to stdout or not
471
472
 *
 */
473
void space_rebuild(struct space *s, int verbose) {
474

Matthieu Schaller's avatar
Matthieu Schaller committed
475
  const ticks tic = getticks();
476

477
478
479
480
481
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
  message("re)building space");
  fflush(stdout);
#endif
482
483

  /* Re-grid if necessary, or just re-set the cell data. */
484
  space_regrid(s, verbose);
485

Pedro Gonnet's avatar
Pedro Gonnet committed
486
487
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
488
  size_t nr_sparts = s->nr_sparts;
489
  struct cell *restrict cells_top = s->cells_top;
490
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
491

Pedro Gonnet's avatar
Pedro Gonnet committed
492
493
494
  /* Run through the particles and get their cell index. Allocates
     an index that is larger than the number of particles to avoid
     re-allocating after shuffling. */
495
  const size_t ind_size = s->size_parts + 100;
496
497
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
498
    error("Failed to allocate temporary particle indices.");
499
  if (s->size_parts > 0) space_parts_get_cell_index(s, ind, cells_top, verbose);
500

501
  /* Run through the gravity particles and get their cell index. */
502
  const size_t gind_size = s->size_gparts + 100;
503
504
505
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
Pedro Gonnet's avatar
Pedro Gonnet committed
506
507
  if (s->size_gparts > 0)
    space_gparts_get_cell_index(s, gind, cells_top, verbose);
508

509
510
511
512
513
514
515
516
  /* Run through the star particles and get their cell index. */
  const size_t sind_size = s->size_sparts + 100;
  int *sind;
  if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL)
    error("Failed to allocate temporary s-particle indices.");
  if (s->size_sparts > 0)
    space_sparts_get_cell_index(s, sind, cells_top, verbose);

517
#ifdef WITH_MPI
518

519
  /* Move non-local parts to the end of the list. */
520
  const int local_nodeID = s->e->nodeID;
521
  for (size_t k = 0; k < nr_parts;) {
522
    if (cells_top[ind[k]].nodeID != local_nodeID) {
523
      nr_parts -= 1;
524
      /* Swap the particle */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
525
      const struct part tp = s->parts[k];
526
527
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
528
      /* Swap the link with the gpart */
529
      if (s->parts[k].gpart != NULL) {
530
        s->parts[k].gpart->id_or_neg_offset = -k;
531
532
      }
      if (s->parts[nr_parts].gpart != NULL) {
533
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
534
      }
535
      /* Swap the xpart */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
536
      const struct xpart txp = s->xparts[k];
537
538
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
539
      /* Swap the index */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
540
      const int t = ind[k];
541
542
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
543
    } else {
544
545
546
547
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
548

549
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
550
  /* Check that all parts are in the correct places. */
551
  for (size_t k = 0; k < nr_parts; k++) {
552
    if (cells_top[ind[k]].nodeID != local_nodeID) {
553
554
555
556
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
557
    if (cells_top[ind[k]].nodeID == local_nodeID) {
558
      error("Failed to remove local parts from send list");
559
    }
560
561
  }
#endif
562

563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
  /* Move non-local sparts to the end of the list. */
  for (size_t k = 0; k < nr_sparts;) {
    if (cells_top[sind[k]].nodeID != local_nodeID) {
      nr_sparts -= 1;
      /* Swap the particle */
      const struct spart tp = s->sparts[k];
      s->sparts[k] = s->sparts[nr_sparts];
      s->sparts[nr_sparts] = tp;
      /* Swap the link with the gpart */
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
      if (s->sparts[nr_parts].gpart != NULL) {
        s->sparts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
      }
      /* Swap the index */
      const int t = sind[k];
      sind[k] = sind[nr_sparts];
      sind[nr_sparts] = t;
    } else {
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }

#ifdef SWIFT_DEBUG_CHECKS
  /* Check that all sparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_sparts; k++) {
    if (cells_top[sind[k]].nodeID != local_nodeID) {
      error("Failed to move all non-local sparts to send list");
    }
  }
  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
    if (cells_top[sind[k]].nodeID == local_nodeID) {
      error("Failed to remove local sparts from send list");
    }
  }
#endif

602
  /* Move non-local gparts to the end of the list. */
603
  for (size_t k = 0; k < nr_gparts;) {
604
    if (cells_top[gind[k]].nodeID != local_nodeID) {
605
      nr_gparts -= 1;
606
      /* Swap the particle */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
607
      const struct gpart tp = s->gparts[k];
608
609
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
610
611
      /* Swap the link with part/spart */
      if (s->gparts[k].type == swift_type_gas) {
612
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
613
614
      } else if (s->gparts[k].type == swift_type_star) {
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
615
      }
616
      if (s->gparts[nr_gparts].type == swift_type_gas) {
617
618
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
619
620
621
      } else if (s->gparts[nr_gparts].type == swift_type_star) {
        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
622
      }
623
      /* Swap the index */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
624
625
626
      const int t = gind[k];
      gind[k] = gind[nr_gparts];
      gind[nr_gparts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
627
    } else {
628
629
630
631
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
632

633
#ifdef SWIFT_DEBUG_CHECKS
634
635
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
636
    if (cells_top[gind[k]].nodeID != local_nodeID) {
637
638
639
640
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
641
    if (cells_top[gind[k]].nodeID == local_nodeID) {
642
      error("Failed to remove local gparts from send list");
643
    }
644
645
  }
#endif
646

647
648
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
649
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
650
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
651
  size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
652
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
653
654
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged,
                         nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged);
Pedro Gonnet's avatar
Pedro Gonnet committed
655
656

  /* Set the new particle counts. */
657
  s->nr_parts = nr_parts + nr_parts_exchanged;
658
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
659
  s->nr_sparts = nr_sparts + nr_sparts_exchanged;
660

661
  /* Re-allocate the index array for the parts if needed.. */
662
  if (s->nr_parts + 1 > ind_size) {
663
    int *ind_new;
664
    if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL)
665
      error("Failed to allocate temporary particle indices.");
666
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
667
668
    free(ind);
    ind = ind_new;
669
670
  }

671
672
673
674
675
676
677
678
679
680
  /* Re-allocate the index array for the sparts if needed.. */
  if (s->nr_sparts + 1 > ind_size) {
    int *sind_new;
    if ((sind_new = (int *)malloc(sizeof(int) * (s->nr_sparts + 1))) == NULL)
      error("Failed to allocate temporary particle indices.");
    memcpy(sind_new, sind, sizeof(int) * nr_sparts);
    free(sind);
    sind = sind_new;
  }

681
682
683
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

684
  /* Assign each part to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
685
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
686
    const struct part *const p = &s->parts[k];
687
    ind[k] =
688
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
689
#ifdef SWIFT_DEBUG_CHECKS
690
    if (cells_top[ind[k]].nodeID != local_nodeID)
691
      error("Received part that does not belong to me (nodeID=%i).",
692
            cells_top[ind[k]].nodeID);
693
#endif
694
  }
695
  nr_parts = s->nr_parts;
696

697
698
699
700
701
702
703
704
705
706
707
708
709
  /* Assign each spart to its cell. */
  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
    const struct spart *const sp = &s->sparts[k];
    sind[k] =
        cell_getid(cdim, sp->x[0] * ih[0], sp->x[1] * ih[1], sp->x[2] * ih[2]);
#ifdef SWIFT_DEBUG_CHECKS
    if (cells_top[sind[k]].nodeID != local_nodeID)
      error("Received part that does not belong to me (nodeID=%i).",
            cells_top[sind[k]].nodeID);
#endif
  }
  nr_sparts = s->nr_sparts;

710
#endif /* WITH_MPI */
711
712

  /* Sort the parts according to their cells. */
713
714
  if (nr_parts > 0)
    space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
715

716
717
718
719
720
721
722
723
724
  /* Sort the sparts according to their cells. */
  if (nr_sparts > 0)
    space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose);

  /* Re-link the gparts to their (s-)particles. */
  if (nr_parts > 0 && nr_gparts > 0)
    part_relink_gparts_to_parts(s->parts, nr_parts, 0);
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0);
725

726
#ifdef SWIFT_DEBUG_CHECKS
727
  /* Verify space_sort_struct. */
728
729
730
  for (size_t k = 1; k < nr_parts; k++) {
    if (ind[k - 1] > ind[k]) {
      error("Sort failed!");
731
    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
732
733
                                    s->parts[k].x[1] * s->iwidth[1],
                                    s->parts[k].x[2] * s->iwidth[2])) {
734
735
736
737
      error("Incorrect indices!");
    }
  }
#endif
738

739
740
741
742
743
744
745
746
747
748
  /* Extract the cell counts from the sorted indices. */
  size_t last_index = 0;
  ind[nr_parts] = s->nr_cells;  // sentinel.
  for (size_t k = 0; k < nr_parts; k++) {
    if (ind[k] < ind[k + 1]) {
      cells_top[ind[k]].count = k - last_index + 1;
      last_index = k + 1;
    }
  }

749
750
  /* Extract the cell counts from the sorted indices. */
  size_t last_sindex = 0;
751
  sind[nr_sparts] = s->nr_cells;  // sentinel.
752
753
754
755
756
757
758
  for (size_t k = 0; k < nr_sparts; k++) {
    if (sind[k] < sind[k + 1]) {
      cells_top[sind[k]].scount = k - last_sindex + 1;
      last_sindex = k + 1;
    }
  }

759
  /* We no longer need the indices as of here. */
760
  free(ind);
761
  free(sind);
762

763
764
#ifdef WITH_MPI

765
  /* Re-allocate the index array for the gparts if needed.. */
766
  if (s->nr_gparts + 1 > gind_size) {
767
    int *gind_new;
768
    if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL)
769
      error("Failed to allocate temporary g-particle indices.");
770
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
771
772
773
774
    free(gind);
    gind = gind_new;
  }

775
  /* Assign each gpart to its cell. */
776
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
777
    const struct gpart *const p = &s->gparts[k];
778
779
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
780
781

#ifdef SWIFT_DEBUG_CHECKS
782
    if (cells_top[gind[k]].nodeID != s->e->nodeID)
783
      error("Received part that does not belong to me (nodeID=%i).",
784
            cells_top[gind[k]].nodeID);
785
#endif
786
787
788
  }
  nr_gparts = s->nr_gparts;

789
#endif /* WITH_MPI */
790

791
  /* Sort the gparts according to their cells. */
792
793
  if (nr_gparts > 0)
    space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
794
795

  /* Re-link the parts. */
796
  if (nr_parts > 0 && nr_gparts > 0)
797
798
799
800
801
    part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts);

  /* Re-link the sparts. */
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts);
802

803
804
  /* Extract the cell counts from the sorted indices. */
  size_t last_gindex = 0;
805
  gind[nr_gparts] = s->nr_cells;
806
807
  for (size_t k = 0; k < nr_gparts; k++) {
    if (gind[k] < gind[k + 1]) {
808
      cells_top[gind[k]].gcount = k - last_gindex + 1;
809
810
811
812
      last_gindex = k + 1;
    }
  }

813
  /* We no longer need the indices as of here. */
814
  free(gind);
815

816
#ifdef SWIFT_DEBUG_CHECKS
817
  /* Verify that the links are correct */
818
819
  part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
                    nr_sparts);
820
#endif
821

822
823
  /* Hook the cells up to the parts. */
  // tic = getticks();
824
825
826
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
827
  struct spart *sfinger = s->sparts;
828
  for (int k = 0; k < s->nr_cells; k++) {
829
    struct cell *restrict c = &cells_top[k];
830
    c->ti_old = ti_current;
831
832
833
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
834
    c->sparts = sfinger;
835
836
837
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
838
    sfinger = &sfinger[c->scount];
839
  }
840
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
841
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
842
843
844

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
845
  space_split(s, cells_top, s->nr_cells, verbose);
846
847
848
849
850
851
852
853
854

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
855
856
 * This is done in parallel using threads in the #threadpool.
 *
857
 * @param s The #space.
858
859
 * @param cells The cell hierarchy.
 * @param nr_cells The number of cells.
860
861
 * @param verbose Are we talkative ?
 */
862
863
void space_split(struct space *s, struct cell *cells, int nr_cells,
                 int verbose) {
864

Matthieu Schaller's avatar
Matthieu Schaller committed
865
  const ticks tic = getticks();
866

867
  threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells,
868
                 sizeof(struct cell), 1, s);
869

870
871
872
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
873
}
874

875
876
877
878
879
880
881
882
/**
 * @brief Runs through the top-level cells and checks whether tasks associated
 * with them can be split. If not, try to sanitize the cells.
 *
 * @param s The #space to act upon.
 */
void space_sanitize(struct space *s) {

883
884
  s->sanitized = 1;

885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
  for (int k = 0; k < s->nr_cells; k++) {

    struct cell *c = &s->cells_top[k];
    const double min_width = c->dmin;

    /* Do we have a problem ? */
    if (c->h_max * kernel_gamma * space_stretch > min_width * 0.5 &&
        c->count > space_maxcount) {

      /* Ok, clean-up the mess */
      cell_sanitize(c);
    }
  }
}

900
/**
901
 * @brief #threadpool mapper function to compute the particle cell indices.
902
 *
903
 * @param map_data Pointer towards the particles.
904
 * @param nr_parts The number of particles to treat.
905
 * @param extra_data Pointers to the space and index list
906
 */
907
908
void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
                                       void *extra_data) {
909

910
911
912
913
  /* Unpack the data */
  struct part *restrict parts = (struct part *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
914
  int *const ind = data->ind + (ptrdiff_t)(parts - s->parts);
915
916

  /* Get some constants */
917
918
919
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
920
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
921
922
923
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];
924

925
  for (int k = 0; k < nr_parts; k++) {
926
927
928
929

    /* Get the particle */
    struct part *restrict p = &parts[k];

930
931
932
933
    const double old_pos_x = p->x[0];
    const double old_pos_y = p->x[1];
    const double old_pos_z = p->x[2];

934
    /* Put it back into the simulation volume */
935
936
937
    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
938
939

    /* Get its cell index */
Matthieu Schaller's avatar
Matthieu Schaller committed
940
941
    const int index =
        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
942
    ind[k] = index;
943

944
945
946
947
    /* Update the position */
    p->x[0] = pos_x;
    p->x[1] = pos_y;
    p->x[2] = pos_z;
948
949
950
951
  }
}

/**
952
 * @brief #threadpool mapper function to compute the g-particle cell indices.
953
 *
954
 * @param map_data Pointer towards the g-particles.
955
 * @param nr_gparts The number of g-particles to treat.
956
 * @param extra_data Pointers to the space and index list
957
 */
958
959
void space_gparts_get_cell_index_mapper(void *map_data, int nr_gparts,
                                        void *extra_data) {
960

Matthieu Schaller's avatar