space.c 76.8 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "const.h"
45
#include "cooling.h"
46
#include "engine.h"
47
#include "error.h"
48
49
#include "gravity.h"
#include "hydro.h"
50
#include "kernel_hydro.h"
51
#include "lock.h"
52
#include "memswap.h"
53
#include "minmax.h"
54
#include "runner.h"
55
#include "stars.h"
56
#include "threadpool.h"
57
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
58
59
60

/* Split size. */
int space_splitsize = space_splitsize_default;
61
int space_subsize = space_subsize_default;
62
int space_maxsize = space_maxsize_default;
63
int space_maxcount = space_maxcount_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
64
65
66

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
111
  struct spart *sparts;
112
113
114
115
116
117
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

118
119
120
121
122
123
124
125
126
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  struct cell *cells;
  int *ind;
};

127
128
129
130
131
132
133
134
135
136
137
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
138
139
140
141
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
142
143
144
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
145
146
147
148
149
150
151
152
153
154
155
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
156
  int sid = 0;
157
  for (int k = 0; k < 3; k++)
158
159
160
161
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
162
    struct cell *temp = *ci;
163
164
    *ci = *cj;
    *cj = temp;
165
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
166
167
168
169
170
171
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
172

173
/**
174
 * @brief Recursively dismantle a cell tree.
175
 *
176
177
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
178
179
 * @param rec_begin Pointer to the start of the list of cells to recycle.
 * @param rec_end Pointer to the end of the list of cells to recycle.
180
 */
181
182
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
                               struct cell **rec_begin, struct cell **rec_end) {
183
  if (c->split)
184
    for (int k = 0; k < 8; k++)
185
      if (c->progeny[k] != NULL) {
186
187
188
189
        space_rebuild_recycle_rec(s, c->progeny[k], rec_begin, rec_end);
        c->progeny[k]->next = *rec_begin;
        *rec_begin = c->progeny[k];
        if (*rec_end == NULL) *rec_end = *rec_begin;
190
191
192
193
        c->progeny[k] = NULL;
      }
}

194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
    struct cell *rec_begin = NULL, *rec_end = NULL;
    space_rebuild_recycle_rec(s, c, &rec_begin, &rec_end);
    if (rec_begin != NULL) space_recycle_list(s, rec_begin, rec_end);
    c->sorts = NULL;
    c->nr_tasks = 0;
    c->density = NULL;
    c->gradient = NULL;
    c->force = NULL;
    c->grav = NULL;
    c->dx_max = 0.0f;
    c->sorted = 0;
    c->count = 0;
    c->gcount = 0;
215
    c->scount = 0;
216
217
218
    c->init = NULL;
    c->extra_ghost = NULL;
    c->ghost = NULL;
219
220
    c->kick1 = NULL;
    c->kick2 = NULL;
221
    c->timestep = NULL;
222
    c->drift = NULL;
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    c->cooling = NULL;
    c->sourceterms = NULL;
    c->super = c;
    if (c->sort != NULL) {
      free(c->sort);
      c->sort = NULL;
    }
#if WITH_MPI
    c->recv_xv = NULL;
    c->recv_rho = NULL;
    c->recv_gradient = NULL;
    c->recv_ti = NULL;

    c->send_xv = NULL;
    c->send_rho = NULL;
    c->send_gradient = NULL;
    c->send_ti = NULL;
#endif
  }
}

244
/**
245
 * @brief Re-build the top-level cell grid.
246
 *
247
 * @param s The #space.
248
 * @param verbose Print messages to stdout or not.
249
 */
250
void space_regrid(struct space *s, int verbose) {
251

252
  const size_t nr_parts = s->nr_parts;
253
  const ticks tic = getticks();
254
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
255

256
  /* Run through the cells and get the current h_max. */
257
  // tic = getticks();
258
  float h_max = s->cell_min / kernel_gamma / space_stretch;
259
  if (nr_parts > 0) {
260
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
261
      for (int k = 0; k < s->nr_cells; k++) {
262
263
264
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
265
        }
266
267
      }
    } else {
268
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
269
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
270
      }
271
272
273
274
275
276
277
278
279
280
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
281
      error("Failed to aggregate the rebuild flag across nodes.");
282
283
284
    h_max = buff;
  }
#endif
285
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
286
287

  /* Get the new putative cell dimensions. */
288
  const int cdim[3] = {
289
290
291
292
293
294
      floor(s->dim[0] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[1] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      floor(s->dim[2] /
            fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
295
296
297
298
299

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
300
301
302
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
303
304
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
305
        " - the (minimal) time-step is too large leading to particles with "
306
307
308
        "predicted smoothing lengths too large for the box size,\n"
        " - particle with velocities so large that they move by more than two "
        "box sizes per time-step.\n");
309

310
311
312
313
314
315
  /* Check if we have enough cells for gravity. */
  if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
    error(
        "Must have at least 8 cells in each spatial dimension when gravity "
        "is switched on.");

316
317
318
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
319
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
320
  double oldwidth[3];
321
322
323
324
325
326
327
328
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
329
330
331
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
332
333
334
335
336
337
338
339
340

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
341
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
342
343
344
345
346
        }
      }
    }
  }

347
348
349
350
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
351
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
352
353
      cdim[2] < s->cdim[2]) {

354
355
356
357
358
359
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
    message("re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
    fflush(stdout);
#endif

360
    /* Free the old cells, if they were allocated. */
361
    if (s->cells_top != NULL) {
362
363
      threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                     s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
364
      free(s->cells_top);
365
366
367
368
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
369
    for (int k = 0; k < 3; k++) {
370
      s->cdim[k] = cdim[k];
371
372
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
373
    }
374
    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
375
376
377

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
378
    if (posix_memalign((void *)&s->cells_top, cell_align,
379
380
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
381
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
382
383

    /* Set the cells' locks */
384
    for (int k = 0; k < s->nr_cells; k++)
385
386
      if (lock_init(&s->cells_top[k].lock) != 0)
        error("Failed to init spinlock.");
387
388

    /* Set the cell location and sizes. */
389
390
391
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
392
          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
393
394
395
396
397
398
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
399
400
401
402
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
403
          c->scount = 0;
404
          c->super = c;
405
          c->ti_old = ti_current;
406
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
407
        }
408
409

    /* Be verbose about the change. */
410
411
412
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
413

414
#ifdef WITH_MPI
415
416
417
418
419
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
420
421
422
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
423

Matthieu Schaller's avatar
Matthieu Schaller committed
424
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
443

444
445
      /* Finished with these. */
      free(oldnodeIDs);
446
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
447
#endif /* WITH_MPI */
448
449
450
451

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

452
  }      /* re-build upper-level cells? */
453
  else { /* Otherwise, just clean up the cells. */
454
455

    /* Free the old cells, if they were allocated. */
456
457
    threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper,
                   s->cells_top, s->nr_cells, sizeof(struct cell), 100, s);
458
459
    s->maxdepth = 0;
  }
460
461
462
463

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
464
}
465
466
467
468
469

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
470
 * @param verbose Print messages to stdout or not
471
472
 *
 */
473
void space_rebuild(struct space *s, int verbose) {
474

Matthieu Schaller's avatar
Matthieu Schaller committed
475
  const ticks tic = getticks();
476

477
478
479
480
481
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
  message("re)building space");
  fflush(stdout);
#endif
482
483

  /* Re-grid if necessary, or just re-set the cell data. */
484
  space_regrid(s, verbose);
485

Pedro Gonnet's avatar
Pedro Gonnet committed
486
487
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
488
  size_t nr_sparts = s->nr_sparts;
489
  struct cell *restrict cells_top = s->cells_top;
490
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
491

Pedro Gonnet's avatar
Pedro Gonnet committed
492
493
494
  /* Run through the particles and get their cell index. Allocates
     an index that is larger than the number of particles to avoid
     re-allocating after shuffling. */
495
  const size_t ind_size = s->size_parts + 100;
496
497
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
498
    error("Failed to allocate temporary particle indices.");
499
  if (s->size_parts > 0) space_parts_get_cell_index(s, ind, cells_top, verbose);
500

501
  /* Run through the gravity particles and get their cell index. */
502
  const size_t gind_size = s->size_gparts + 100;
503
504
505
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
Pedro Gonnet's avatar
Pedro Gonnet committed
506
507
  if (s->size_gparts > 0)
    space_gparts_get_cell_index(s, gind, cells_top, verbose);
508

509
510
511
512
513
514
515
516
  /* Run through the star particles and get their cell index. */
  const size_t sind_size = s->size_sparts + 100;
  int *sind;
  if ((sind = (int *)malloc(sizeof(int) * sind_size)) == NULL)
    error("Failed to allocate temporary s-particle indices.");
  if (s->size_sparts > 0)
    space_sparts_get_cell_index(s, sind, cells_top, verbose);

517
#ifdef WITH_MPI
518
  const int local_nodeID = s->e->nodeID;
519

520
  /* Move non-local parts to the end of the list. */
521
  for (size_t k = 0; k < nr_parts;) {
522
    if (cells_top[ind[k]].nodeID != local_nodeID) {
523
      nr_parts -= 1;
524
      /* Swap the particle */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
525
      const struct part tp = s->parts[k];
526
527
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
528
      /* Swap the link with the gpart */
529
      if (s->parts[k].gpart != NULL) {
530
        s->parts[k].gpart->id_or_neg_offset = -k;
531
532
      }
      if (s->parts[nr_parts].gpart != NULL) {
533
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
534
      }
535
      /* Swap the xpart */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
536
      const struct xpart txp = s->xparts[k];
537
538
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
539
      /* Swap the index */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
540
      const int t = ind[k];
541
542
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
543
    } else {
544
545
546
547
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
548

549
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
550
  /* Check that all parts are in the correct places. */
551
  for (size_t k = 0; k < nr_parts; k++) {
552
    if (cells_top[ind[k]].nodeID != local_nodeID) {
553
554
555
556
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
557
    if (cells_top[ind[k]].nodeID == local_nodeID) {
558
      error("Failed to remove local parts from send list");
559
    }
560
561
  }
#endif
562

563
564
565
566
567
568
569
570
571
572
573
574
  /* Move non-local sparts to the end of the list. */
  for (size_t k = 0; k < nr_sparts;) {
    if (cells_top[sind[k]].nodeID != local_nodeID) {
      nr_sparts -= 1;
      /* Swap the particle */
      const struct spart tp = s->sparts[k];
      s->sparts[k] = s->sparts[nr_sparts];
      s->sparts[nr_sparts] = tp;
      /* Swap the link with the gpart */
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
575
576
      if (s->sparts[nr_sparts].gpart != NULL) {
        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
      }
      /* Swap the index */
      const int t = sind[k];
      sind[k] = sind[nr_sparts];
      sind[nr_sparts] = t;
    } else {
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }

#ifdef SWIFT_DEBUG_CHECKS
  /* Check that all sparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_sparts; k++) {
    if (cells_top[sind[k]].nodeID != local_nodeID) {
      error("Failed to move all non-local sparts to send list");
    }
  }
  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
    if (cells_top[sind[k]].nodeID == local_nodeID) {
      error("Failed to remove local sparts from send list");
    }
  }
#endif

602
  /* Move non-local gparts to the end of the list. */
603
  for (size_t k = 0; k < nr_gparts;) {
604
    if (cells_top[gind[k]].nodeID != local_nodeID) {
605
      nr_gparts -= 1;
606
      /* Swap the particle */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
607
      const struct gpart tp = s->gparts[k];
608
609
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
610
611
      /* Swap the link with part/spart */
      if (s->gparts[k].type == swift_type_gas) {
612
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
613
614
      } else if (s->gparts[k].type == swift_type_star) {
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
615
      }
616
      if (s->gparts[nr_gparts].type == swift_type_gas) {
617
618
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
619
620
621
      } else if (s->gparts[nr_gparts].type == swift_type_star) {
        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
622
      }
623
      /* Swap the index */
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
624
625
626
      const int t = gind[k];
      gind[k] = gind[nr_gparts];
      gind[nr_gparts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
627
    } else {
628
629
630
631
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
632

633
#ifdef SWIFT_DEBUG_CHECKS
634
635
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
636
    if (cells_top[gind[k]].nodeID != local_nodeID) {
637
638
639
640
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
641
    if (cells_top[gind[k]].nodeID == local_nodeID) {
642
      error("Failed to remove local gparts from send list");
643
    }
644
645
  }
#endif
646

647
648
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
649
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
650
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
651
  size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
652
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
653
654
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged,
                         nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged);
Pedro Gonnet's avatar
Pedro Gonnet committed
655
656

  /* Set the new particle counts. */
657
  s->nr_parts = nr_parts + nr_parts_exchanged;
658
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
659
  s->nr_sparts = nr_sparts + nr_sparts_exchanged;
660

661
  /* Re-allocate the index array for the parts if needed.. */
662
  if (s->nr_parts + 1 > ind_size) {
663
    int *ind_new;
664
    if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL)
665
      error("Failed to allocate temporary particle indices.");
666
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
667
668
    free(ind);
    ind = ind_new;
669
670
  }

671
  /* Re-allocate the index array for the sparts if needed.. */
672
  if (s->nr_sparts + 1 > sind_size) {
673
674
    int *sind_new;
    if ((sind_new = (int *)malloc(sizeof(int) * (s->nr_sparts + 1))) == NULL)
675
      error("Failed to allocate temporary s-particle indices.");
676
677
678
679
680
    memcpy(sind_new, sind, sizeof(int) * nr_sparts);
    free(sind);
    sind = sind_new;
  }

681
682
683
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

684
  /* Assign each received part to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
685
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
686
    const struct part *const p = &s->parts[k];
687
    ind[k] =
688
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
689
#ifdef SWIFT_DEBUG_CHECKS
690
    if (cells_top[ind[k]].nodeID != local_nodeID)
691
      error("Received part that does not belong to me (nodeID=%i).",
692
            cells_top[ind[k]].nodeID);
693
#endif
694
  }
695
  nr_parts = s->nr_parts;
696

697
  /* Assign each received spart to its cell. */
698
699
700
701
702
703
  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
    const struct spart *const sp = &s->sparts[k];
    sind[k] =
        cell_getid(cdim, sp->x[0] * ih[0], sp->x[1] * ih[1], sp->x[2] * ih[2]);
#ifdef SWIFT_DEBUG_CHECKS
    if (cells_top[sind[k]].nodeID != local_nodeID)
704
      error("Received s-part that does not belong to me (nodeID=%i).",
705
706
707
708
709
            cells_top[sind[k]].nodeID);
#endif
  }
  nr_sparts = s->nr_sparts;

710
#endif /* WITH_MPI */
711
712

  /* Sort the parts according to their cells. */
713
714
  if (nr_parts > 0)
    space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
715

716
717
718
719
720
721
722
723
724
  /* Sort the sparts according to their cells. */
  if (nr_sparts > 0)
    space_sparts_sort(s, sind, nr_sparts, 0, s->nr_cells - 1, verbose);

  /* Re-link the gparts to their (s-)particles. */
  if (nr_parts > 0 && nr_gparts > 0)
    part_relink_gparts_to_parts(s->parts, nr_parts, 0);
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_gparts_to_sparts(s->sparts, nr_sparts, 0);
725

726
#ifdef SWIFT_DEBUG_CHECKS
727
  /* Verify space_sort_struct. */
728
729
730
  for (size_t k = 1; k < nr_parts; k++) {
    if (ind[k - 1] > ind[k]) {
      error("Sort failed!");
731
    } else if (ind[k] != cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
732
733
                                    s->parts[k].x[1] * s->iwidth[1],
                                    s->parts[k].x[2] * s->iwidth[2])) {
734
735
736
737
      error("Incorrect indices!");
    }
  }
#endif
738

739
740
741
742
743
744
745
746
747
748
  /* Extract the cell counts from the sorted indices. */
  size_t last_index = 0;
  ind[nr_parts] = s->nr_cells;  // sentinel.
  for (size_t k = 0; k < nr_parts; k++) {
    if (ind[k] < ind[k + 1]) {
      cells_top[ind[k]].count = k - last_index + 1;
      last_index = k + 1;
    }
  }

749
750
  /* Extract the cell counts from the sorted indices. */
  size_t last_sindex = 0;
751
  sind[nr_sparts] = s->nr_cells;  // sentinel.
752
753
754
755
756
757
758
  for (size_t k = 0; k < nr_sparts; k++) {
    if (sind[k] < sind[k + 1]) {
      cells_top[sind[k]].scount = k - last_sindex + 1;
      last_sindex = k + 1;
    }
  }

759
  /* We no longer need the indices as of here. */
760
  free(ind);
761
  free(sind);
762

763
764
#ifdef WITH_MPI

765
  /* Re-allocate the index array for the gparts if needed.. */
766
  if (s->nr_gparts + 1 > gind_size) {
767
    int *gind_new;
768
    if ((gind_new = (int *)malloc(sizeof(int) * (s->nr_gparts + 1))) == NULL)
769
      error("Failed to allocate temporary g-particle indices.");
770
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
771
772
773
774
    free(gind);
    gind = gind_new;
  }

775
  /* Assign each received gpart to its cell. */
776
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
777
    const struct gpart *const p = &s->gparts[k];
778
779
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
780
781

#ifdef SWIFT_DEBUG_CHECKS
782
    if (cells_top[gind[k]].nodeID != s->e->nodeID)
783
      error("Received g-part that does not belong to me (nodeID=%i).",
784
            cells_top[gind[k]].nodeID);
785
#endif
786
787
788
  }
  nr_gparts = s->nr_gparts;

789
#endif /* WITH_MPI */
790

791
  /* Sort the gparts according to their cells. */
792
793
  if (nr_gparts > 0)
    space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
794
795

  /* Re-link the parts. */
796
  if (nr_parts > 0 && nr_gparts > 0)
797
798
799
800
801
    part_relink_parts_to_gparts(s->gparts, nr_gparts, s->parts);

  /* Re-link the sparts. */
  if (nr_sparts > 0 && nr_gparts > 0)
    part_relink_sparts_to_gparts(s->gparts, nr_gparts, s->sparts);
802

803
804
  /* Extract the cell counts from the sorted indices. */
  size_t last_gindex = 0;
805
  gind[nr_gparts] = s->nr_cells;
806
807
  for (size_t k = 0; k < nr_gparts; k++) {
    if (gind[k] < gind[k + 1]) {
808
      cells_top[gind[k]].gcount = k - last_gindex + 1;
809
810
811
812
      last_gindex = k + 1;
    }
  }

813
  /* We no longer need the indices as of here. */
814
  free(gind);
815

816
#ifdef SWIFT_DEBUG_CHECKS
817
  /* Verify that the links are correct */
818
  part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
819
                    nr_sparts, verbose);
820
#endif
821

822
823
  /* Hook the cells up to the parts. */
  // tic = getticks();
824
825
826
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
827
  struct spart *sfinger = s->sparts;
828
  for (int k = 0; k < s->nr_cells; k++) {
829
    struct cell *restrict c = &cells_top[k];
830
    c->ti_old = ti_current;
831
832
833
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
834
    c->sparts = sfinger;
835
836
837
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
838
    sfinger = &sfinger[c->scount];
839
  }
840
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
841
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
842
843
844

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
845
  space_split(s, cells_top, s->nr_cells, verbose);
846
847
848
849
850
851
852
853
854

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
855
856
 * This is done in parallel using threads in the #threadpool.
 *
857
 * @param s The #space.
858
859
 * @param cells The cell hierarchy.
 * @param nr_cells The number of cells.
860
861
 * @param verbose Are we talkative ?
 */
862
863
void space_split(struct space *s, struct cell *cells, int nr_cells,
                 int verbose) {
864

Matthieu Schaller's avatar
Matthieu Schaller committed
865
  const ticks tic = getticks();
866

867
  threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells,
868
                 sizeof(struct cell), 1, s);
869

870
871
872
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
873
}
874

875
876
877
878
879
880
881
882
/**
 * @brief Runs through the top-level cells and checks whether tasks associated
 * with them can be split. If not, try to sanitize the cells.
 *
 * @param s The #space to act upon.
 */
void space_sanitize(struct space *s) {

883
884
  s->sanitized = 1;

885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
  for (int k = 0; k < s->nr_cells; k++) {

    struct cell *c = &s->cells_top[k];
    const double min_width = c->dmin;

    /* Do we have a problem ? */
    if (c->h_max * kernel_gamma * space_stretch > min_width * 0.5 &&
        c->count > space_maxcount) {

      /* Ok, clean-up the mess */
      cell_sanitize(c);
    }
  }
}

900
/**
901
 * @brief #threadpool mapper function to compute the particle cell indices.
902
 *
903
 * @param map_data Pointer towards the particles.
904
 * @param nr_parts The number of particles to treat.
905
 * @param extra_data Pointers to the space and index list
906
 */
907
908
void space_parts_get_cell_index_mapper(void *map_data, int nr_parts,
                                       void *extra_data) {
909

910
911
912
913
  /* Unpack the data */
  struct part *restrict parts = (struct part *)map_data;
  struct index_data *data = (struct index_data *)extra_data;
  struct space *s = data->s;
914
  int *const ind = data->ind + (ptrdiff_t)(parts - s->parts);
915
916

  /* Get some constants */
917
918
919
  const double dim_x = s->dim[0];
  const double dim_y = s->dim[1];
  const double dim_z = s->dim[2];
Matthieu Schaller's avatar
Matthieu Schaller committed
920
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
921
922
923
  const double ih_x = s->iwidth[0];
  const double ih_y = s->iwidth[1];
  const double ih_z = s->iwidth[2];
924

925
  for (int k = 0; k < nr_parts; k++) {
926
927
928
929

    /* Get the particle */
    struct part *restrict p = &parts[k];

930
931
932
933
    const double old_pos_x = p->x[0];
    const double old_pos_y = p->x[1];
    const double old_pos_z = p->x[2];

934
    /* Put it back into the simulation volume */
935
936
937
    const double pos_x = box_wrap(old_pos_x, 0.0, dim_x);
    const double pos_y = box_wrap(old_pos_y, 0.0, dim_y);
    const double pos_z = box_wrap(old_pos_z, 0.0, dim_z);
938
939

    /* Get its cell index */
Matthieu Schaller's avatar
Matthieu Schaller committed
940
941
    const int index =
        cell_getid(cdim, pos_x * ih_x, pos_y * ih_y, pos_z * ih_z);
942
    ind[k] = index;
943

944
945
946
947
    /* Update the position */
    p->x[0] = pos_x;
    p->x[1] = pos_y;
    p->x[2] = pos_z;
948
949
950
951
  }
}

/**
952
 * @brief #threadpool mapper function to compute the g-particle cell indices.
Matthieu Schaller's avatar