space.c 107 KB
Newer Older
1
/*******************************************************************************
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23 24 25 26 27 28 29 30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34 35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37 38
#endif

39 40 41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "chemistry.h"
45
#include "const.h"
46
#include "cooling.h"
47
#include "engine.h"
48
#include "error.h"
49 50
#include "gravity.h"
#include "hydro.h"
51
#include "kernel_hydro.h"
52
#include "lock.h"
53
#include "memswap.h"
54
#include "minmax.h"
55
#include "multipole.h"
56
#include "restart.h"
57
#include "runner.h"
58
#include "sort_part.h"
59
#include "stars.h"
60
#include "threadpool.h"
61
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
62 63 64

/* Split size. */
int space_splitsize = space_splitsize_default;
65 66 67
int space_subsize_pair_hydro = space_subsize_pair_hydro_default;
int space_subsize_self_hydro = space_subsize_self_hydro_default;
int space_subsize_pair_grav = space_subsize_pair_grav_default;
68
int space_subsize_self_grav = space_subsize_self_grav_default;
69
int space_maxsize = space_maxsize_default;
70 71 72
#ifdef SWIFT_DEBUG_CHECKS
int last_cell_id;
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
73

74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
90
  struct spart *sparts;
91 92 93 94 95 96
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

97 98 99 100 101 102 103
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  struct cell *cells;
  int *ind;
104
  int *cell_counts;
105 106
};

107 108 109 110 111 112 113 114 115 116 117
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
118 119 120 121
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
122 123 124
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
125 126 127 128 129 130 131 132 133 134 135
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
136
  int sid = 0;
137
  for (int k = 0; k < 3; k++)
138 139 140 141
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
142
    struct cell *temp = *ci;
143 144
    *ci = *cj;
    *cj = temp;
145
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
146 147 148 149 150 151
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
152

153
/**
154
 * @brief Recursively dismantle a cell tree.
155
 *
156 157
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
158 159 160 161 162 163
 * @param cell_rec_begin Pointer to the start of the list of cells to recycle.
 * @param cell_rec_end Pointer to the end of the list of cells to recycle.
 * @param multipole_rec_begin Pointer to the start of the list of multipoles to
 * recycle.
 * @param multipole_rec_end Pointer to the end of the list of multipoles to
 * recycle.
164
 */
165
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
166 167
                               struct cell **cell_rec_begin,
                               struct cell **cell_rec_end,
168 169
                               struct gravity_tensors **multipole_rec_begin,
                               struct gravity_tensors **multipole_rec_end) {
170
  if (c->split)
171
    for (int k = 0; k < 8; k++)
172
      if (c->progeny[k] != NULL) {
173 174 175 176 177 178
        space_rebuild_recycle_rec(s, c->progeny[k], cell_rec_begin,
                                  cell_rec_end, multipole_rec_begin,
                                  multipole_rec_end);

        c->progeny[k]->next = *cell_rec_begin;
        *cell_rec_begin = c->progeny[k];
179 180 181 182 183

        if (s->gravity) {
          c->progeny[k]->multipole->next = *multipole_rec_begin;
          *multipole_rec_begin = c->progeny[k]->multipole;
        }
184 185

        if (*cell_rec_end == NULL) *cell_rec_end = *cell_rec_begin;
186
        if (s->gravity && *multipole_rec_end == NULL)
187 188 189
          *multipole_rec_end = *multipole_rec_begin;

        c->progeny[k]->multipole = NULL;
190 191 192 193
        c->progeny[k] = NULL;
      }
}

194 195 196 197 198 199 200 201
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
202
    struct cell *cell_rec_begin = NULL, *cell_rec_end = NULL;
203 204
    struct gravity_tensors *multipole_rec_begin = NULL,
                           *multipole_rec_end = NULL;
205 206 207 208 209
    space_rebuild_recycle_rec(s, c, &cell_rec_begin, &cell_rec_end,
                              &multipole_rec_begin, &multipole_rec_end);
    if (cell_rec_begin != NULL)
      space_recycle_list(s, cell_rec_begin, cell_rec_end, multipole_rec_begin,
                         multipole_rec_end);
210 211 212 213 214 215
    c->sorts = NULL;
    c->nr_tasks = 0;
    c->density = NULL;
    c->gradient = NULL;
    c->force = NULL;
    c->grav = NULL;
216 217
    c->dx_max_part = 0.0f;
    c->dx_max_gpart = 0.0f;
218
    c->dx_max_sort = 0.0f;
219 220 221
    c->sorted = 0;
    c->count = 0;
    c->gcount = 0;
222
    c->scount = 0;
223
    c->init_grav = NULL;
224
    c->extra_ghost = NULL;
225 226
    c->ghost_in = NULL;
    c->ghost_out = NULL;
227
    c->ghost = NULL;
228 229
    c->kick1 = NULL;
    c->kick2 = NULL;
230
    c->timestep = NULL;
231
    c->end_force = NULL;
232 233
    c->drift_part = NULL;
    c->drift_gpart = NULL;
234 235
    c->cooling = NULL;
    c->sourceterms = NULL;
236 237
    c->grav_ghost_in = NULL;
    c->grav_ghost_out = NULL;
238 239
    c->grav_long_range = NULL;
    c->grav_down = NULL;
240
    c->super = c;
241 242
    c->super_hydro = c;
    c->super_gravity = c;
243 244 245 246
    c->parts = NULL;
    c->xparts = NULL;
    c->gparts = NULL;
    c->sparts = NULL;
Matthieu Schaller's avatar
Matthieu Schaller committed
247
    if (s->gravity) bzero(c->multipole, sizeof(struct gravity_tensors));
248 249 250 251 252
    for (int i = 0; i < 13; i++)
      if (c->sort[i] != NULL) {
        free(c->sort[i]);
        c->sort[i] = NULL;
      }
253 254 255 256
#if WITH_MPI
    c->recv_xv = NULL;
    c->recv_rho = NULL;
    c->recv_gradient = NULL;
257
    c->recv_grav = NULL;
258 259 260 261 262
    c->recv_ti = NULL;

    c->send_xv = NULL;
    c->send_rho = NULL;
    c->send_gradient = NULL;
263
    c->send_grav = NULL;
264 265 266 267 268
    c->send_ti = NULL;
#endif
  }
}

269 270 271 272
/**
 * @brief Free up any allocated cells.
 */
void space_free_cells(struct space *s) {
Matthieu Schaller's avatar
Matthieu Schaller committed
273 274
  threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, s->cells_top,
                 s->nr_cells, sizeof(struct cell), 0, s);
275 276 277
  s->maxdepth = 0;
}

278
/**
279
 * @brief Re-build the top-level cell grid.
280
 *
281
 * @param s The #space.
282
 * @param verbose Print messages to stdout or not.
283
 */
284
void space_regrid(struct space *s, int verbose) {
285

286
  const size_t nr_parts = s->nr_parts;
287
  const ticks tic = getticks();
288
  const integertime_t ti_old = (s->e != NULL) ? s->e->ti_old : 0;
289

290
  /* Run through the cells and get the current h_max. */
291
  // tic = getticks();
292
  float h_max = s->cell_min / kernel_gamma / space_stretch;
293
  if (nr_parts > 0) {
294
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
295
      for (int k = 0; k < s->nr_cells; k++) {
296 297 298
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
299
        }
300 301
      }
    } else {
302
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
303
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
304
      }
305 306 307 308 309 310 311 312 313 314
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
315
      error("Failed to aggregate the rebuild flag across nodes.");
316 317 318
    h_max = buff;
  }
#endif
319
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
320 321

  /* Get the new putative cell dimensions. */
322
  const int cdim[3] = {
323 324 325 326 327 328
      (int)floor(s->dim[0] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      (int)floor(s->dim[1] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      (int)floor(s->dim[2] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
329 330 331 332 333

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
334 335 336
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
337 338
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
339
        " - the (minimal) time-step is too large leading to particles with "
340
        "predicted smoothing lengths too large for the box size,\n"
341
        " - particles with velocities so large that they move by more than two "
342
        "box sizes per time-step.\n");
343

344
  /* Check if we have enough cells for periodic gravity. */
345
  if (s->gravity && s->periodic && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
346
    error(
347 348
        "Must have at least 8 cells in each spatial dimension when periodic "
        "gravity is switched on.\nThis error is often caused by any of the "
349 350 351 352 353 354
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
        " - the (minimal) time-step is too large leading to particles with "
        "predicted smoothing lengths too large for the box size,\n"
355
        " - particles with velocities so large that they move by more than two "
356
        "box sizes per time-step.\n");
357

358 359 360
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
361
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
362
  double oldwidth[3];
363 364 365 366 367 368 369 370
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
371 372 373
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
374 375 376 377 378 379 380 381 382

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
383
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
384 385 386 387 388
        }
      }
    }
  }

Peter W. Draper's avatar
Peter W. Draper committed
389
  /* Are we about to allocate new top level cells without a regrid?
Peter W. Draper's avatar
Peter W. Draper committed
390 391
   * Can happen when restarting the application. */
  int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL);
392 393 394 395
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
396
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
397 398
      cdim[2] < s->cdim[2]) {

399 400
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
401
    message("(re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
402 403 404
    fflush(stdout);
#endif

405
    /* Free the old cells, if they were allocated. */
406
    if (s->cells_top != NULL) {
407
      space_free_cells(s);
408
      free(s->local_cells_top);
409
      free(s->cells_top);
410
      free(s->multipoles_top);
411 412
    }

413 414 415 416
    /* Also free the task arrays, these will be regenerated and we can use the
     * memory while copying the particle arrays. */
    if (s->e != NULL) scheduler_free_tasks(&s->e->sched);

417
    /* Set the new cell dimensions only if smaller. */
418
    for (int k = 0; k < 3; k++) {
419
      s->cdim[k] = cdim[k];
420 421
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
422
    }
423
    const float dmin = min3(s->width[0], s->width[1], s->width[2]);
424 425 426

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
427
    if (posix_memalign((void **)&s->cells_top, cell_align,
428
                       s->nr_cells * sizeof(struct cell)) != 0)
429
      error("Failed to allocate top-level cells.");
430
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
431

432 433
    /* Allocate the multipoles for the top-level cells. */
    if (s->gravity) {
434
      if (posix_memalign((void **)&s->multipoles_top, multipole_align,
435
                         s->nr_cells * sizeof(struct gravity_tensors)) != 0)
436
        error("Failed to allocate top-level multipoles.");
437
      bzero(s->multipoles_top, s->nr_cells * sizeof(struct gravity_tensors));
438 439
    }

440
    /* Allocate the indices of local cells */
441
    if (posix_memalign((void **)&s->local_cells_top, SWIFT_STRUCT_ALIGNMENT,
442 443 444 445
                       s->nr_cells * sizeof(int)) != 0)
      error("Failed to allocate indices of local top-level cells.");
    bzero(s->local_cells_top, s->nr_cells * sizeof(int));

446
    /* Set the cells' locks */
447
    for (int k = 0; k < s->nr_cells; k++) {
448
      if (lock_init(&s->cells_top[k].lock) != 0)
449 450 451 452 453 454 455 456
        error("Failed to init spinlock for hydro.");
      if (lock_init(&s->cells_top[k].glock) != 0)
        error("Failed to init spinlock for gravity.");
      if (lock_init(&s->cells_top[k].mlock) != 0)
        error("Failed to init spinlock for multipoles.");
      if (lock_init(&s->cells_top[k].slock) != 0)
        error("Failed to init spinlock for stars.");
    }
457 458

    /* Set the cell location and sizes. */
459 460 461
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
462 463
          const size_t cid = cell_getid(cdim, i, j, k);
          struct cell *restrict c = &s->cells_top[cid];
464 465 466 467 468 469
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
470 471 472 473
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
474
          c->scount = 0;
475
          c->super = c;
476 477
          c->super_hydro = c;
          c->super_gravity = c;
478 479
          c->ti_old_part = ti_old;
          c->ti_old_gpart = ti_old;
480
          c->ti_old_multipole = ti_old;
Matthieu Schaller's avatar
Matthieu Schaller committed
481
          if (s->gravity) c->multipole = &s->multipoles_top[cid];
Pedro Gonnet's avatar
Pedro Gonnet committed
482
        }
483 484

    /* Be verbose about the change. */
485 486 487
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
488

489
#ifdef WITH_MPI
490 491 492 493 494
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
495 496 497
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
498

Matthieu Schaller's avatar
Matthieu Schaller committed
499
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
518

519 520
      /* Finished with these. */
      free(oldnodeIDs);
Peter W. Draper's avatar
Peter W. Draper committed
521 522

    } else if (no_regrid && s->e != NULL) {
Peter W. Draper's avatar
Peter W. Draper committed
523 524 525 526 527 528 529 530 531 532 533 534 535
      /* If we have created the top-levels cells and not done an initial
       * partition (can happen when restarting), then the top-level cells
       * are not assigned to a node, we must do that and then associate the
       * particles with the cells. Note requires that
       * partition_store_celllist() was called once before, or just before
       * dumping the restart files.*/
      partition_restore_celllist(s, s->e->reparttype);

      /* Now re-distribute the particles, should just add to cells? */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
536
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
537
#endif /* WITH_MPI */
538 539 540 541

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

542
  }      /* re-build upper-level cells? */
543
  else { /* Otherwise, just clean up the cells. */
544 545

    /* Free the old cells, if they were allocated. */
546
    space_free_cells(s);
547
  }
548 549 550 551

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
552
}
553 554 555 556 557

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
558
 * @param verbose Print messages to stdout or not
559 560
 *
 */
561
void space_rebuild(struct space *s, int verbose) {
562

Matthieu Schaller's avatar
Matthieu Schaller committed
563
  const ticks tic = getticks();
564

565 566
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
Pedro Gonnet's avatar
Pedro Gonnet committed
567
  if (s->e->nodeID == 0 || verbose) message("(re)building space");
568 569
  fflush(stdout);
#endif
570 571

  /* Re-grid if necessary, or just re-set the cell data. */
572
  space_regrid(s, verbose);
573

Pedro Gonnet's avatar
Pedro Gonnet committed
574 575
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
576
  size_t nr_sparts = s->nr_sparts;
577
  struct cell *restrict cells_top = s->cells_top;
578
  const integertime_t ti_old = (s->e != NULL) ? s->e->ti_old : 0;
579

Pedro Gonnet's avatar
Pedro Gonnet committed
580 581 582
  /* Run through the particles and get their cell index. Allocates
     an index that is larger than the number of particles to avoid
     re-allocating after shuffling. */
583
  const size_t ind_size = s->size_parts + 100;
584 585 586 587 588 589 590
  int *ind = (int *)malloc(sizeof(int) * ind_size);
  if (ind == NULL) error("Failed to allocate temporary particle indices.");
  int *cell_part_counts = (int *)calloc(sizeof(int), s->nr_cells);
  if (cell_part_counts == NULL)
    error("Failed to allocate cell part count buffer.");
  if (s->size_parts > 0)
    space_parts_get_cell_index(s, ind, cell_part_counts, cells_top, verbose);
591

592
  /* Run through the gravity particles and get their cell index. */
593
  const size_t gind_size = s->size_gparts + 100;
594 595 596 597 598
  int *gind = (int *)malloc(sizeof(int) * gind_size);
  if (gind == NULL) error("Failed to allocate temporary g-particle indices.");
  int *cell_gpart_counts = (int *)calloc(sizeof(int), s->nr_cells);
  if (cell_gpart_counts == NULL)
    error("Failed to allocate cell gpart count buffer.");
Pedro Gonnet's avatar
Pedro Gonnet committed
599
  if (s->size_gparts > 0)
600
    space_gparts_get_cell_index(s, gind, cell_gpart_counts, cells_top, verbose);
601

602 603
  /* Run through the star particles and get their cell index. */
  const size_t sind_size = s->size_sparts + 100;
604 605 606 607 608
  int *sind = (int *)malloc(sizeof(int) * sind_size);
  if (sind == NULL) error("Failed to allocate temporary s-particle indices.");
  int *cell_spart_counts = (int *)calloc(sizeof(int), s->nr_cells);
  if (cell_spart_counts == NULL)
    error("Failed to allocate cell gpart count buffer.");
609
  if (s->size_sparts > 0)
610
    space_sparts_get_cell_index(s, sind, cell_spart_counts, cells_top, verbose);
611

612
#ifdef WITH_MPI
613
  const int local_nodeID = s->e->nodeID;
614

615
  /* Move non-local parts to the end of the list. */
616
  for (size_t k = 0; k < nr_parts;) {
617
    if (cells_top[ind[k]].nodeID != local_nodeID) {
618
      nr_parts -= 1;
619
      /* Swap the particle */
620
      memswap(&s->parts[k], &s->parts[nr_parts], sizeof(struct part));
621
      /* Swap the link with the gpart */
622
      if (s->parts[k].gpart != NULL) {
623
        s->parts[k].gpart->id_or_neg_offset = -k;
624 625
      }
      if (s->parts[nr_parts].gpart != NULL) {
626
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
627
      }
628
      /* Swap the xpart */
629
      memswap(&s->xparts[k], &s->xparts[nr_parts], sizeof(struct xpart));
630
      /* Swap the index */
631
      memswap(&ind[k], &ind[nr_parts], sizeof(int));
Matthieu Schaller's avatar
Matthieu Schaller committed
632
    } else {
633 634 635 636
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
637

638
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
639
  /* Check that all parts are in the correct places. */
640
  for (size_t k = 0; k < nr_parts; k++) {
641
    if (cells_top[ind[k]].nodeID != local_nodeID) {
642 643 644 645
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
646
    if (cells_top[ind[k]].nodeID == local_nodeID) {
647
      error("Failed to remove local parts from send list");
648
    }
649 650
  }
#endif
651

652 653 654 655 656
  /* Move non-local sparts to the end of the list. */
  for (size_t k = 0; k < nr_sparts;) {
    if (cells_top[sind[k]].nodeID != local_nodeID) {
      nr_sparts -= 1;
      /* Swap the particle */
657
      memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));
658 659 660 661
      /* Swap the link with the gpart */
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
662 663
      if (s->sparts[nr_sparts].gpart != NULL) {
        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
664 665
      }
      /* Swap the index */
666
      memswap(&sind[k], &sind[nr_sparts], sizeof(int));
667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
    } else {
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }

#ifdef SWIFT_DEBUG_CHECKS
  /* Check that all sparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_sparts; k++) {
    if (cells_top[sind[k]].nodeID != local_nodeID) {
      error("Failed to move all non-local sparts to send list");
    }
  }
  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
    if (cells_top[sind[k]].nodeID == local_nodeID) {
      error("Failed to remove local sparts from send list");
    }
  }
#endif

687
  /* Move non-local gparts to the end of the list. */
688
  for (size_t k = 0; k < nr_gparts;) {
689
    if (cells_top[gind[k]].nodeID != local_nodeID) {
690
      nr_gparts -= 1;
691
      /* Swap the particle */
Pedro Gonnet's avatar
Pedro Gonnet committed
692
      memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart));
693 694
      /* Swap the link with part/spart */
      if (s->gparts[k].type == swift_type_gas) {
695
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
696 697
      } else if (s->gparts[k].type == swift_type_star) {
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
698
      }
699
      if (s->gparts[nr_gparts].type == swift_type_gas) {
700 701
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
702 703 704
      } else if (s->gparts[nr_gparts].type == swift_type_star) {
        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
705
      }
706
      /* Swap the index */
Pedro Gonnet's avatar
Pedro Gonnet committed
707
      memswap(&gind[k], &gind[nr_gparts], sizeof(int));
Matthieu Schaller's avatar
Matthieu Schaller committed
708
    } else {
709 710 711 712
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
713

714
#ifdef SWIFT_DEBUG_CHECKS
715 716
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
717
    if (cells_top[gind[k]].nodeID != local_nodeID) {
718 719 720 721
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
722
    if (cells_top[gind[k]].nodeID == local_nodeID) {
723
      error("Failed to remove local gparts from send list");
724
    }
725 726
  }
#endif
727

728 729
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
730
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
731
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
732
  size_t nr_sparts_exchanged = s->nr_sparts - nr_sparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
733
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
734 735
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged,
                         nr_sparts, &sind[nr_sparts], &nr_sparts_exchanged);
Pedro Gonnet's avatar
Pedro Gonnet committed
736 737

  /* Set the new particle counts. */
738
  s->nr_parts = nr_parts + nr_parts_exchanged;
739
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
740
  s->nr_sparts = nr_sparts + nr_sparts_exchanged;
741

742 743 744 745 746 747 748 749
  /* Clear non-local cell counts. */
  for (int k = 0; k < s->nr_cells; k++) {
    if (s->cells_top[k].nodeID != local_nodeID) {
      cell_part_counts[k] = 0;
      cell_spart_counts[k] = 0;
      cell_gpart_counts[k] = 0;
    }
  }
750

751
  /* Re-allocate the index array for the parts if needed.. */
752
  if (s->nr_parts + 1 > ind_size) {
753
    int *ind_new;
754
    if ((ind_new = (int *)malloc(sizeof(int) * (s->nr_parts + 1))) == NULL)
755
      error("Failed to allocate temporary particle indices.");
756
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
757 758
    free(ind);
    ind = ind_new;
759 760
  }

761
  /* Re-allocate the index array for the sparts if needed.. */
762
  if (s->nr_sparts + 1 > sind_size) {
763 764
    int *sind_new;
    if ((sind_new = (int *)malloc(sizeof(int) * (s->nr_sparts + 1))) == NULL)
765
      error("Failed to allocate temporary s-particle indices.");
766 767 768 769 770
    memcpy(sind_new, sind, sizeof(int) * nr_sparts);
    free(sind);
    sind = sind_new;
  }

771 772 773
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

774
  /* Assign each received part to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
775
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
776
    const struct part *const p = &s->parts[k];
777
    ind[k] =
778
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
779
    cell_part_counts[ind[k]]++;
780
#ifdef SWIFT_DEBUG_CHECKS
781
    if (cells_top[ind[k]].nodeID != local_nodeID)
782
      error("Received part that does not belong to me (nodeID=%i).",
783
            cells_top[ind[k]].nodeID);
784
#endif
785
  }
786
  nr_parts = s->nr_parts;
787

788
  /* Assign each received spart to its cell. */
789 790 791 792
  for (size_t k = nr_sparts; k < s->nr_sparts; k++) {
    const struct spart *const sp = &s->sparts[k];
    sind[k] =
        cell_getid(cdim, sp->x[0] * ih[0], sp->x[1] * ih[1], sp->x[2] * ih[2]);
793
    cell_spart_counts[sind[k]]++;
794 795
#ifdef SWIFT_DEBUG_CHECKS
    if (cells_top[sind[k]].nodeID != local_nodeID)
796
      error("Received s-part that does not belong to me (nodeID=%i).",
797 798 799 800 801
            cells_top[sind[k]].nodeID);
#endif
  }
  nr_sparts = s->nr_sparts;

lhausamm's avatar
lhausamm committed
802
#endif /* WITH_MPI */
803 804

  /* Sort the parts according to their cells. */
805
  if (nr_parts > 0)
806 807
    space_parts_sort(s->parts, s->xparts, ind, cell_part_counts, s->nr_cells,
                     0);
808

809 810 811
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
  for (size_t k = 0; k < nr_parts; k++) {
812 813 814 815 816 817 818