space.c 138 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "chemistry.h"
45
#include "const.h"
46
#include "cooling.h"
47
#include "debug.h"
48
#include "engine.h"
49
#include "error.h"
50
51
#include "gravity.h"
#include "hydro.h"
52
#include "kernel_hydro.h"
53
#include "lock.h"
54
#include "memswap.h"
55
#include "minmax.h"
56
#include "multipole.h"
57
#include "restart.h"
58
#include "sort_part.h"
59
#include "stars.h"
60
#include "threadpool.h"
61
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
62
63
64

/* Split size. */
int space_splitsize = space_splitsize_default;
65
66
67
int space_subsize_pair_hydro = space_subsize_pair_hydro_default;
int space_subsize_self_hydro = space_subsize_self_hydro_default;
int space_subsize_pair_grav = space_subsize_pair_grav_default;
68
int space_subsize_self_grav = space_subsize_self_grav_default;
69
70
int space_subsize_pair_stars = space_subsize_pair_stars_default;
int space_subsize_self_stars = space_subsize_self_stars_default;
71
int space_subdepth_grav = space_subdepth_grav_default;
72
int space_maxsize = space_maxsize_default;
73

74
75
76
/*! Number of extra #part we allocate memory for per top-level cell */
int space_extra_parts = space_extra_parts_default;

77
78
79
/*! Number of extra #spart we allocate memory for per top-level cell */
int space_extra_sparts = space_extra_sparts_default;

80
81
82
/*! Number of extra #gpart we allocate memory for per top-level cell */
int space_extra_gparts = space_extra_gparts_default;

83
84
/*! Expected maximal number of strays received at a rebuild */
int space_expected_max_nr_strays = space_expected_max_nr_strays_default;
85
86
87
#ifdef SWIFT_DEBUG_CHECKS
int last_cell_id;
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
88

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
105
  struct spart *sparts;
106
107
108
109
110
111
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

112
113
114
115
116
117
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  int *ind;
118
  int *cell_counts;
119
120
121
122
123
124
  size_t count_inhibited_part;
  size_t count_inhibited_gpart;
  size_t count_inhibited_spart;
  size_t count_extra_part;
  size_t count_extra_gpart;
  size_t count_extra_spart;
125
126
};

127
/**
128
 * @brief Recursively dismantle a cell tree.
129
 *
130
131
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
132
133
134
135
136
137
 * @param cell_rec_begin Pointer to the start of the list of cells to recycle.
 * @param cell_rec_end Pointer to the end of the list of cells to recycle.
 * @param multipole_rec_begin Pointer to the start of the list of multipoles to
 * recycle.
 * @param multipole_rec_end Pointer to the end of the list of multipoles to
 * recycle.
138
 */
139
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
140
141
                               struct cell **cell_rec_begin,
                               struct cell **cell_rec_end,
142
143
                               struct gravity_tensors **multipole_rec_begin,
                               struct gravity_tensors **multipole_rec_end) {
144
  if (c->split)
145
    for (int k = 0; k < 8; k++)
146
      if (c->progeny[k] != NULL) {
147
148
149
150
151
152
        space_rebuild_recycle_rec(s, c->progeny[k], cell_rec_begin,
                                  cell_rec_end, multipole_rec_begin,
                                  multipole_rec_end);

        c->progeny[k]->next = *cell_rec_begin;
        *cell_rec_begin = c->progeny[k];
153

154
        if (s->with_self_gravity) {
155
156
          c->progeny[k]->grav.multipole->next = *multipole_rec_begin;
          *multipole_rec_begin = c->progeny[k]->grav.multipole;
157
        }
158
159

        if (*cell_rec_end == NULL) *cell_rec_end = *cell_rec_begin;
160
        if (s->with_self_gravity && *multipole_rec_end == NULL)
161
162
          *multipole_rec_end = *multipole_rec_begin;

163
        c->progeny[k]->grav.multipole = NULL;
164
165
166
167
        c->progeny[k] = NULL;
      }
}

168
169
170
171
172
173
174
175
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
176
    struct cell *cell_rec_begin = NULL, *cell_rec_end = NULL;
177
178
    struct gravity_tensors *multipole_rec_begin = NULL,
                           *multipole_rec_end = NULL;
179
180
181
182
183
    space_rebuild_recycle_rec(s, c, &cell_rec_begin, &cell_rec_end,
                              &multipole_rec_begin, &multipole_rec_end);
    if (cell_rec_begin != NULL)
      space_recycle_list(s, cell_rec_begin, cell_rec_end, multipole_rec_begin,
                         multipole_rec_end);
184
    c->hydro.sorts = NULL;
185
    c->nr_tasks = 0;
186
187
188
189
190
191
    c->grav.nr_mm_tasks = 0;
    c->hydro.density = NULL;
    c->hydro.gradient = NULL;
    c->hydro.force = NULL;
    c->grav.grav = NULL;
    c->grav.mm = NULL;
192
193
    c->hydro.dx_max_part = 0.0f;
    c->hydro.dx_max_sort = 0.0f;
Loic Hausammann's avatar
Loic Hausammann committed
194
    c->stars.dx_max_part = 0.f;
195
196
    c->hydro.sorted = 0;
    c->hydro.count = 0;
197
198
    c->hydro.updated = 0;
    c->hydro.inhibited = 0;
199
    c->grav.count = 0;
200
201
    c->grav.updated = 0;
    c->grav.inhibited = 0;
202
    c->stars.count = 0;
203
204
    c->stars.updated = 0;
    c->stars.inhibited = 0;
205
206
207
208
209
210
    c->grav.init = NULL;
    c->grav.init_out = NULL;
    c->hydro.extra_ghost = NULL;
    c->hydro.ghost_in = NULL;
    c->hydro.ghost_out = NULL;
    c->hydro.ghost = NULL;
211
212
213
214
    c->stars.ghost_in = NULL;
    c->stars.ghost_out = NULL;
    c->stars.ghost = NULL;
    c->stars.density = NULL;
215
216
    c->kick1 = NULL;
    c->kick2 = NULL;
217
    c->timestep = NULL;
218
    c->end_force = NULL;
219
    c->hydro.drift = NULL;
220
    c->grav.drift = NULL;
221
    c->hydro.cooling = NULL;
222
    c->sourceterms = NULL;
223
224
225
226
    c->grav.long_range = NULL;
    c->grav.down_in = NULL;
    c->grav.down = NULL;
    c->grav.mesh = NULL;
227
    c->super = c;
228
229
230
231
    c->hydro.super = c;
    c->grav.super = c;
    c->hydro.parts = NULL;
    c->hydro.xparts = NULL;
232
233
    c->grav.parts = NULL;
    c->stars.parts = NULL;
234
235
236
237
238
239
240
    c->hydro.do_sub_sort = 0;
    c->grav.do_sub_drift = 0;
    c->hydro.do_sub_drift = 0;
    c->hydro.ti_end_min = -1;
    c->hydro.ti_end_max = -1;
    c->grav.ti_end_min = -1;
    c->grav.ti_end_max = -1;
241
242
243
#ifdef SWIFT_DEBUG_CHECKS
    c->cellID = 0;
#endif
244
245
    if (s->with_self_gravity)
      bzero(c->grav.multipole, sizeof(struct gravity_tensors));
246
    for (int i = 0; i < 13; i++)
247
248
249
      if (c->hydro.sort[i] != NULL) {
        free(c->hydro.sort[i]);
        c->hydro.sort[i] = NULL;
250
      }
251
#if WITH_MPI
252
253
    c->mpi.tag = -1;

254
255
256
257
    c->mpi.hydro.recv_xv = NULL;
    c->mpi.hydro.recv_rho = NULL;
    c->mpi.hydro.recv_gradient = NULL;
    c->mpi.grav.recv = NULL;
258
259
    c->mpi.recv_ti = NULL;

260
261
262
263
    c->mpi.hydro.send_xv = NULL;
    c->mpi.hydro.send_rho = NULL;
    c->mpi.hydro.send_gradient = NULL;
    c->mpi.grav.send = NULL;
264
    c->mpi.send_ti = NULL;
265
266
267
268
#endif
  }
}

269
270
271
272
/**
 * @brief Free up any allocated cells.
 */
void space_free_cells(struct space *s) {
273
274
275

  ticks tic = getticks();

Matthieu Schaller's avatar
Matthieu Schaller committed
276
277
  threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, s->cells_top,
                 s->nr_cells, sizeof(struct cell), 0, s);
278
  s->maxdepth = 0;
279
280
281
282

  if (s->e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
283
284
}

285
/**
286
 * @brief Re-build the top-level cell grid.
287
 *
288
 * @param s The #space.
289
 * @param verbose Print messages to stdout or not.
290
 */
291
void space_regrid(struct space *s, int verbose) {
292

293
  const size_t nr_parts = s->nr_parts;
Loic Hausammann's avatar
Loic Hausammann committed
294
  const size_t nr_sparts = s->nr_sparts;
295
  const ticks tic = getticks();
296
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
297

298
299
  message("REGRID!!");

300
  /* Run through the cells and get the current h_max. */
301
  // tic = getticks();
302
  float h_max = s->cell_min / kernel_gamma / space_stretch;
303
  if (nr_parts > 0) {
304
305
306
307
308
309

    /* Can we use the list of local non-empty top-level cells? */
    if (s->local_cells_with_particles_top != NULL) {
      for (int k = 0; k < s->nr_local_cells_with_particles; ++k) {
        const struct cell *c =
            &s->cells_top[s->local_cells_with_particles_top[k]];
310
        if (c->hydro.h_max > h_max) {
311
          h_max = c->hydro.h_max;
312
        }
Loic Hausammann's avatar
Loic Hausammann committed
313
        if (c->stars.h_max > h_max) {
314
          h_max = c->stars.h_max;
Loic Hausammann's avatar
Loic Hausammann committed
315
        }
316
      }
317
318

      /* Can we instead use all the top-level cells? */
319
    } else if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
320
      for (int k = 0; k < s->nr_cells; k++) {
321
        const struct cell *c = &s->cells_top[k];
322
        if (c->nodeID == engine_rank && c->hydro.h_max > h_max) {
323
          h_max = c->hydro.h_max;
324
        }
Loic Hausammann's avatar
Loic Hausammann committed
325
        if (c->nodeID == engine_rank && c->stars.h_max > h_max) {
326
          h_max = c->stars.h_max;
Loic Hausammann's avatar
Loic Hausammann committed
327
        }
328
      }
329
330

      /* Last option: run through the particles */
331
    } else {
332
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
333
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
334
      }
Loic Hausammann's avatar
Loic Hausammann committed
335
336
337
      for (size_t k = 0; k < nr_sparts; k++) {
        if (s->sparts[k].h > h_max) h_max = s->sparts[k].h;
      }
338
339
340
341
342
343
344
345
346
347
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
348
      error("Failed to aggregate the rebuild flag across nodes.");
349
350
351
    h_max = buff;
  }
#endif
352
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
353
354

  /* Get the new putative cell dimensions. */
355
  const int cdim[3] = {
356
357
358
359
360
361
      (int)floor(s->dim[0] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      (int)floor(s->dim[1] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      (int)floor(s->dim[2] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
362
363
364
365
366

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
367
368
369
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
370
371
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
372
        " - the (minimal) time-step is too large leading to particles with "
373
        "predicted smoothing lengths too large for the box size,\n"
374
        " - particles with velocities so large that they move by more than two "
375
        "box sizes per time-step.\n");
376

377
378
379
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
380
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
381
  double oldwidth[3];
382
383
384
385
386
387
388
389
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
390
391
392
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
393
394
395
396
397
398
399
400
401

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
402
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
403
404
405
406
407
        }
      }
    }
  }

Peter W. Draper's avatar
Peter W. Draper committed
408
  /* Are we about to allocate new top level cells without a regrid?
Peter W. Draper's avatar
Peter W. Draper committed
409
   * Can happen when restarting the application. */
410
  const int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL);
411
412
413
414
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
415
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
416
417
      cdim[2] < s->cdim[2]) {

418
419
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
420
    message("(re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
421
422
423
    fflush(stdout);
#endif

424
    /* Free the old cells, if they were allocated. */
425
    if (s->cells_top != NULL) {
426
      space_free_cells(s);
427
      free(s->local_cells_with_tasks_top);
428
      free(s->local_cells_top);
429
      free(s->cells_with_particles_top);
430
      free(s->local_cells_with_particles_top);
431
      free(s->cells_top);
432
      free(s->multipoles_top);
433
434
    }

435
436
437
438
    /* Also free the task arrays, these will be regenerated and we can use the
     * memory while copying the particle arrays. */
    if (s->e != NULL) scheduler_free_tasks(&s->e->sched);

439
    /* Set the new cell dimensions only if smaller. */
440
    for (int k = 0; k < 3; k++) {
441
      s->cdim[k] = cdim[k];
442
443
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
444
    }
445
    const float dmin = min3(s->width[0], s->width[1], s->width[2]);
446
447
448

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
449
    if (posix_memalign((void **)&s->cells_top, cell_align,
450
                       s->nr_cells * sizeof(struct cell)) != 0)
451
      error("Failed to allocate top-level cells.");
452
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
453

454
    /* Allocate the multipoles for the top-level cells. */
455
    if (s->with_self_gravity) {
456
      if (posix_memalign((void **)&s->multipoles_top, multipole_align,
457
                         s->nr_cells * sizeof(struct gravity_tensors)) != 0)
458
        error("Failed to allocate top-level multipoles.");
459
      bzero(s->multipoles_top, s->nr_cells * sizeof(struct gravity_tensors));
460
461
    }

462
    /* Allocate the indices of local cells */
463
    if (posix_memalign((void **)&s->local_cells_top, SWIFT_STRUCT_ALIGNMENT,
464
465
466
467
                       s->nr_cells * sizeof(int)) != 0)
      error("Failed to allocate indices of local top-level cells.");
    bzero(s->local_cells_top, s->nr_cells * sizeof(int));

468
    /* Allocate the indices of local cells with tasks */
469
470
    if (posix_memalign((void **)&s->local_cells_with_tasks_top,
                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
471
      error("Failed to allocate indices of local top-level cells with tasks.");
472
473
    bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int));

474
    /* Allocate the indices of cells with particles */
475
    if (posix_memalign((void **)&s->cells_with_particles_top,
476
                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
477
478
      error("Failed to allocate indices of top-level cells with particles.");
    bzero(s->cells_with_particles_top, s->nr_cells * sizeof(int));
479

480
481
482
483
484
485
486
487
    /* Allocate the indices of local cells with particles */
    if (posix_memalign((void **)&s->local_cells_with_particles_top,
                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
      error(
          "Failed to allocate indices of local top-level cells with "
          "particles.");
    bzero(s->local_cells_with_particles_top, s->nr_cells * sizeof(int));

488
    /* Set the cells' locks */
489
    for (int k = 0; k < s->nr_cells; k++) {
490
      if (lock_init(&s->cells_top[k].hydro.lock) != 0)
491
        error("Failed to init spinlock for hydro.");
492
      if (lock_init(&s->cells_top[k].grav.plock) != 0)
493
        error("Failed to init spinlock for gravity.");
494
      if (lock_init(&s->cells_top[k].grav.mlock) != 0)
495
        error("Failed to init spinlock for multipoles.");
496
      if (lock_init(&s->cells_top[k].stars.lock) != 0)
497
498
        error("Failed to init spinlock for stars.");
    }
499
500

    /* Set the cell location and sizes. */
501
502
503
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
504
505
          const size_t cid = cell_getid(cdim, i, j, k);
          struct cell *restrict c = &s->cells_top[cid];
506
507
508
509
510
511
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
512
513
          c->dmin = dmin;
          c->depth = 0;
514
          c->split = 0;
515
          c->hydro.count = 0;
516
517
          c->grav.count = 0;
          c->stars.count = 0;
518
          c->super = c;
519
520
          c->hydro.super = c;
          c->grav.super = c;
521
522
          c->hydro.ti_old_part = ti_current;
          c->grav.ti_old_part = ti_current;
523
          c->grav.ti_old_multipole = ti_current;
Pedro Gonnet's avatar
Pedro Gonnet committed
524
#ifdef WITH_MPI
525
          c->mpi.tag = -1;
Pedro Gonnet's avatar
Pedro Gonnet committed
526
#endif  // WITH_MPI
527
          if (s->with_self_gravity) c->grav.multipole = &s->multipoles_top[cid];
528
#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
529
530
          c->cellID = -last_cell_id;
          last_cell_id++;
531
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
532
        }
533
534

    /* Be verbose about the change. */
535
536
537
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
538

539
#ifdef WITH_MPI
540
541
542
543
544
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
545
546
547
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
548

Matthieu Schaller's avatar
Matthieu Schaller committed
549
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
550
551
552
553

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
554
#if defined(HAVE_PARMETIS) || defined(HAVE_METIS)
555
556
557
558
559
560
561
562
563
564
565
566
567
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
568

569
570
      /* Finished with these. */
      free(oldnodeIDs);
Peter W. Draper's avatar
Peter W. Draper committed
571
572

    } else if (no_regrid && s->e != NULL) {
Peter W. Draper's avatar
Peter W. Draper committed
573
574
575
576
577
578
579
580
581
582
583
584
585
      /* If we have created the top-levels cells and not done an initial
       * partition (can happen when restarting), then the top-level cells
       * are not assigned to a node, we must do that and then associate the
       * particles with the cells. Note requires that
       * partition_store_celllist() was called once before, or just before
       * dumping the restart files.*/
      partition_restore_celllist(s, s->e->reparttype);

      /* Now re-distribute the particles, should just add to cells? */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
586
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
587
#endif /* WITH_MPI */
588
589
590
591

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

592
  }      /* re-build upper-level cells? */
593
  else { /* Otherwise, just clean up the cells. */
594
595

    /* Free the old cells, if they were allocated. */
596
    space_free_cells(s);
597
  }
598
599
600
601

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
602
}
603

604
605
606
607
608
609
/**
 * @brief Allocate memory for the extra particles used for on-the-fly creation.
 *
 * This rarely actually allocates memory. Most of the time, we convert
 * pre-allocated memory inot extra particles.
 *
610
611
 * This function also sets the extra particles' location to their top-level
 * cells. They can then be sorted into their correct memory position later on.
612
613
614
615
 *
 * @param s The current #space.
 * @param verbose Are we talkative?
 */
616
617
618
619
void space_allocate_extras(struct space *s, int verbose) {

  const int local_nodeID = s->e->nodeID;

620
621
622
623
624
  /* Anything to do here? (Abort if we don't want extras)*/
  if (space_extra_parts == 0 && space_extra_gparts == 0 &&
      space_extra_sparts == 0)
    return;

625
626
627
628
629
630
  /* The top-level cells */
  const struct cell *cells = s->cells_top;
  const double half_cell_width[3] = {0.5 * cells[0].width[0],
                                     0.5 * cells[0].width[1],
                                     0.5 * cells[0].width[2]};

631
  /* The current number of particles (including spare ones) */
632
633
634
635
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
  size_t nr_sparts = s->nr_sparts;

636
637
638
639
640
  /* The current number of actual particles */
  size_t nr_actual_parts = nr_parts - s->nr_extra_parts;
  size_t nr_actual_gparts = nr_gparts - s->nr_extra_gparts;
  size_t nr_actual_sparts = nr_sparts - s->nr_extra_sparts;

641
642
643
644
645
646
647
648
649
  /* The number of particles we allocated memory for (MPI overhead) */
  size_t size_parts = s->size_parts;
  size_t size_gparts = s->size_gparts;
  size_t size_sparts = s->size_sparts;

  int local_cells = 0;
  for (int i = 0; i < s->nr_cells; ++i)
    if (s->cells_top[i].nodeID == local_nodeID) local_cells++;

650
651
  /* Number of extra particles we want for each type */
  const size_t expected_num_extra_parts = local_cells * space_extra_parts;
652
653
  const size_t expected_num_extra_gparts = local_cells * space_extra_gparts;
  const size_t expected_num_extra_sparts = local_cells * space_extra_sparts;
654

655
656
657
  if (verbose) {
    message("Currently have %zd/%zd/%zd real particles.", nr_actual_parts,
            nr_actual_gparts, nr_actual_sparts);
658
    message("Currently have %zd/%zd/%zd spaces for extra particles.",
659
            s->nr_extra_parts, s->nr_extra_gparts, s->nr_extra_sparts);
660
    message("Requesting space for future %zd/%zd/%zd part/gpart/sparts.",
661
662
663
            expected_num_extra_parts, expected_num_extra_gparts,
            expected_num_extra_sparts);
  }
664

665
  /* Do we have enough space for the extras (i.e. we haven't used up any) ? */
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
  if (expected_num_extra_parts > s->nr_extra_parts) {

    /* Do we need to reallocate? */
    if (nr_actual_parts + expected_num_extra_parts > size_parts) {

      size_parts = (nr_actual_parts + expected_num_extra_parts) *
                   engine_redistribute_alloc_margin;

      if (verbose)
        message("Re-allocating parts array from %zd to %zd", s->size_parts,
                size_parts);

      /* Create more space for parts */
      struct part *parts_new = NULL;
      if (posix_memalign((void **)&parts_new, part_align,
                         sizeof(struct part) * size_parts) != 0)
        error("Failed to allocate new part data");
      memcpy(parts_new, s->parts, sizeof(struct part) * s->size_parts);
      free(s->parts);
      s->parts = parts_new;

      /* Same for xparts */
      struct xpart *xparts_new = NULL;
      if (posix_memalign((void **)&xparts_new, xpart_align,
                         sizeof(struct xpart) * size_parts) != 0)
        error("Failed to allocate new xpart data");
      memcpy(xparts_new, s->xparts, sizeof(struct xpart) * s->size_parts);
      free(s->xparts);
      s->xparts = xparts_new;

      /* Update the counter */
      s->size_parts = size_parts;
    }

700
    /* Turn some of the allocated spares into particles we can use */
701
702
703
704
705
706
707
    for (size_t i = nr_parts; i < nr_actual_parts + expected_num_extra_parts;
         ++i) {
      bzero(&s->parts[i], sizeof(struct part));
      bzero(&s->xparts[i], sizeof(struct xpart));
      s->parts[i].time_bin = time_bin_not_created;
    }

708
709
710
711
712
713
714
      /* Put the spare particles in their correct cell */
#ifdef WITH_MPI
    error("Need to do this correctly over MPI for only the local cells.");
#endif
    int count_in_cell = 0, current_cell = 0;
    size_t count_extra_parts = 0;
    for (size_t i = 0; i < nr_actual_parts + expected_num_extra_parts; ++i) {
715
716
717
718
719
720

#ifdef SWIFT_DEBUG_CHECKS
      if (current_cell == s->nr_cells)
        error("Cell counter beyond the maximal nr. cells.");
#endif

721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
      if (s->parts[i].time_bin == time_bin_not_created) {

        /* We want the extra particles to be at the centre of their cell */
        s->parts[i].x[0] = cells[current_cell].loc[0] + half_cell_width[0];
        s->parts[i].x[1] = cells[current_cell].loc[1] + half_cell_width[1];
        s->parts[i].x[2] = cells[current_cell].loc[2] + half_cell_width[2];
        ++count_in_cell;
        count_extra_parts++;
      }

      /* Once we have reached the number of extra part per cell, we move to the
       * next */
      if (count_in_cell == space_extra_parts) {
        ++current_cell;
        count_in_cell = 0;
      }
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (count_extra_parts != expected_num_extra_parts)
      error("Constructed the wrong number of extra parts (%zd vs. %zd)",
            count_extra_parts, expected_num_extra_parts);
#endif

745
746
747
    /* Update the counters */
    s->nr_parts = nr_actual_parts + expected_num_extra_parts;
    s->nr_extra_parts = expected_num_extra_parts;
748
  }
749

750
  if (expected_num_extra_parts < s->nr_extra_parts) {
751
    error("Reduction in top-level cells number not handled.");
752
753
  }

754
  if (nr_gparts + expected_num_extra_gparts > size_gparts) {
755
  }
756
  if (nr_sparts + expected_num_extra_sparts > size_sparts) {
757
758
759
  }
}

760
761
762
763
/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
764
 * @param repartitioned Did we just repartition?
765
 * @param verbose Print messages to stdout or not
766
 */
767
void space_rebuild(struct space *s, int repartitioned, int verbose) {
768

Matthieu Schaller's avatar
Matthieu Schaller committed
769
  const ticks tic = getticks();
770

771
772
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
Pedro Gonnet's avatar
Pedro Gonnet committed
773
  if (s->e->nodeID == 0 || verbose) message("(re)building space");
774
775
  fflush(stdout);
#endif
776
777

  /* Re-grid if necessary, or just re-set the cell data. */
778
  space_regrid(s, verbose);
779

780
781
782
  /* Allocate extra space for particles that will be created */
  space_allocate_extras(s, verbose);

783
784
  struct cell *cells_top = s->cells_top;
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
785
  const int local_nodeID = s->e->nodeID;
786
787

  /* The current number of particles */
Pedro Gonnet's avatar
Pedro Gonnet committed
788
789
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
790
  size_t nr_sparts = s->nr_sparts;
791
792
793
794
795
796

  /* The number of particles we allocated memory for */
  size_t size_parts = s->size_parts;
  size_t size_gparts = s->size_gparts;
  size_t size_sparts = s->size_sparts;

797
  /* Counter for the number of inhibited particles found on the node */
798
799
800
801
  size_t count_inhibited_parts = 0;
  size_t count_inhibited_gparts = 0;
  size_t count_inhibited_sparts = 0;

802
  /* Counter for the number of extra particles found on the node */
803
804
805
  size_t count_extra_parts = 0;
  size_t count_extra_gparts = 0;
  size_t count_extra_sparts = 0;
806
807
808
809
810

  /* Number of particles we expect to have after strays exchange */
  const size_t h_index_size = size_parts + space_expected_max_nr_strays;
  const size_t g_index_size = size_gparts + space_expected_max_nr_strays;
  const size_t s_index_size = size_sparts + space_expected_max_nr_strays;
811

812
813
814
  /* Allocate arrays to store the indices of the cells where particles
     belong. We allocate extra space to allow for particles we may
     receive from other nodes */
815
  int *h_index = (int *)malloc(sizeof(int) * h_index_size);
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
  int *g_index = (int *)malloc(sizeof(int) * g_index_size);
  int *s_index = (int *)malloc(sizeof(int) * s_index_size);
  if (h_index == NULL || g_index == NULL || s_index == NULL)
    error("Failed to allocate temporary particle indices.");

  /* Allocate counters of particles that will land in each cell */
  int *cell_part_counts = (int *)malloc(sizeof(int) * s->nr_cells);
  int *cell_gpart_counts = (int *)malloc(sizeof(int) * s->nr_cells);
  int *cell_spart_counts = (int *)malloc(sizeof(int) * s->nr_cells);
  if (cell_part_counts == NULL || cell_gpart_counts == NULL ||
      cell_spart_counts == NULL)
    error("Failed to allocate cell particle count buffer.");

  /* Initialise the counters, including buffer space for future particles */
  for (int i = 0; i < s->nr_cells; ++i) {
831
832
833
    cell_part_counts[i] = 0;   // space_extra_parts;
    cell_gpart_counts[i] = 0;  // space_extra_gparts;
    cell_spart_counts[i] = 0;  // space_extra_sparts;
834
835
836
  }

  /* Run through the particles and get their cell index. */
837
  if (nr_parts > 0)
838
    space_parts_get_cell_index(s, h_index, cell_part_counts,
839
840
841
                               &count_inhibited_parts, &count_extra_parts,
                               verbose);
  if (nr_gparts > 0)
842
    space_gparts_get_cell_index(s, g_index, cell_gpart_counts,
843
844
845
                                &count_inhibited_gparts, &count_extra_gparts,
                                verbose);
  if (nr_sparts > 0)
846
    space_sparts_get_cell_index(s, s_index, cell_spart_counts,
847
848
                                &count_inhibited_sparts, &count_extra_sparts,
                                verbose);
849

850
#ifdef SWIFT_DEBUG_CHECKS
851
  /* Some safety checks */
852
853
854
855
856
857
  if (repartitioned && count_inhibited_parts)
    error("We just repartitioned but still found inhibited parts.");
  if (repartitioned && count_inhibited_sparts)
    error("We just repartitioned but still found inhibited sparts.");
  if (repartitioned && count_inhibited_gparts)
    error("We just repartitioned but still found inhibited gparts.");
858
859
860
861
862
863
864
865
866
867
868
869
870

  if (count_extra_parts != s->nr_extra_parts)
    error(
        "Number of extra parts in the part array not matching the space "
        "counter.");
  if (count_extra_gparts != s->nr_extra_gparts)
    error(
        "Number of extra gparts in the gpart array not matching the space "
        "counter.");
  if (count_extra_sparts != s->nr_extra_sparts)
    error(
        "Number of extra sparts in the spart array not matching the space "
        "counter.");
871
#endif
872

873
  /* Move non-local parts and inhibited parts to the end of the list. */
874
  if (!repartitioned && (s->e->nr_nodes > 1 || count_inhibited_parts > 0)) {
875
    for (size_t k = 0; k < nr_parts; /* void */) {
876
877

      /* Inhibited particle or foreign particle */
878
      if (h_index[k] == -1 || cells_top[h_index[k]].nodeID != local_nodeID) {
879
880

        /* One fewer particle */
881
        nr_parts -= 1;
882

883
884
        /* Swap the particle */
        memswap(&s->parts[k], &s->parts[nr_parts], sizeof(struct part));
885

886
887
888
889
890
891
892
        /* Swap the link with the gpart */
        if (s->parts[k].gpart != NULL) {
          s->parts[k].gpart->id_or_neg_offset = -k;
        }
        if (s->parts[nr_parts].gpart != NULL) {
          s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
        }
893

894
895
896
        /* Swap the xpart */
        memswap(&s->xparts[k], &s->xparts[nr_parts], sizeof(struct xpart));
        /* Swap the index */
897
        memswap(&h_index[k], &h_index[nr_parts], sizeof(int));
898

899
      } else {
900
901
        /* Increment when not exchanging otherwise we need to retest "k".*/
        k++;
902
      }
903
904
    }
  }
905

906
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
907
  /* Check that all parts are in the correct places. */
908
  size_t check_count_inhibited_part = 0;
909
  for (size_t k = 0; k < nr_parts; k++) {
910
    if (h_index[k] == -1 || cells_top[h_index[k]].nodeID != local_nodeID) {
911
912
913
914
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
915
    if (h_index[k] != -1 && cells_top[h_index[k]].nodeID == local_nodeID) {
916
      error("Failed to remove local parts from send list");
917
    }
918
    if (h_index[k] == -1) ++check_count_inhibited_part;
919
  }
920
921
  if (check_count_inhibited_part != count_inhibited_parts)
    error("Counts of inhibited particles do not match!");
922
#endif /* SWIFT_DEBUG_CHECKS */
923

924
  /* Move non-local sparts and inhibited sparts to the end of the list. */
925
  if (!repartitioned && (s->e->nr_nodes > 1 || count_inhibited_sparts > 0)) {
926
    for (size_t k = 0; k < nr_sparts; /* void */) {
927
928

      /* Inhibited particle or foreign particle */
929
      if (s_index[k] == -1 || cells_top[s_index[k]].nodeID != local_nodeID) {
930
931

        /* One fewer particle */
932
        nr_sparts -= 1;
933

934
935
        /* Swap the particle */
        memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));
936

937
938
939
940
941
942
943
        /* Swap the link with the gpart */
        if (s->sparts[k].gpart != NULL) {
          s->sparts[k].gpart->id_or_neg_offset = -k;
        }
        if (s->sparts[nr_sparts].gpart != NULL) {
          s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
        }
944