space.c 145 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "chemistry.h"
45
#include "const.h"
46
#include "cooling.h"
47
#include "debug.h"
48
#include "engine.h"
49
#include "error.h"
50
51
#include "gravity.h"
#include "hydro.h"
52
#include "kernel_hydro.h"
53
#include "lock.h"
54
#include "memswap.h"
55
#include "minmax.h"
56
#include "multipole.h"
57
#include "restart.h"
58
#include "sort_part.h"
59
#include "stars.h"
60
#include "threadpool.h"
61
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
62
63
64

/* Split size. */
int space_splitsize = space_splitsize_default;
65
66
67
int space_subsize_pair_hydro = space_subsize_pair_hydro_default;
int space_subsize_self_hydro = space_subsize_self_hydro_default;
int space_subsize_pair_grav = space_subsize_pair_grav_default;
68
int space_subsize_self_grav = space_subsize_self_grav_default;
69
70
int space_subsize_pair_stars = space_subsize_pair_stars_default;
int space_subsize_self_stars = space_subsize_self_stars_default;
71
int space_subdepth_diff_grav = space_subdepth_diff_grav_default;
72
int space_maxsize = space_maxsize_default;
73

74
75
76
/*! Number of extra #part we allocate memory for per top-level cell */
int space_extra_parts = space_extra_parts_default;

77
78
79
/*! Number of extra #spart we allocate memory for per top-level cell */
int space_extra_sparts = space_extra_sparts_default;

80
81
82
/*! Number of extra #gpart we allocate memory for per top-level cell */
int space_extra_gparts = space_extra_gparts_default;

83
84
/*! Expected maximal number of strays received at a rebuild */
int space_expected_max_nr_strays = space_expected_max_nr_strays_default;
85
86
87
#ifdef SWIFT_DEBUG_CHECKS
int last_cell_id;
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
88

89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
105
  struct spart *sparts;
106
107
108
109
110
111
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

112
113
114
115
116
117
/**
 * @brief Information required to compute the particle cell indices.
 */
struct index_data {
  struct space *s;
  int *ind;
118
  int *cell_counts;
119
120
121
122
123
124
  size_t count_inhibited_part;
  size_t count_inhibited_gpart;
  size_t count_inhibited_spart;
  size_t count_extra_part;
  size_t count_extra_gpart;
  size_t count_extra_spart;
125
126
};

127
/**
128
 * @brief Recursively dismantle a cell tree.
129
 *
130
131
 * @param s The #space.
 * @param c The #cell to recycle.
Matthieu Schaller's avatar
Matthieu Schaller committed
132
133
134
135
136
137
 * @param cell_rec_begin Pointer to the start of the list of cells to recycle.
 * @param cell_rec_end Pointer to the end of the list of cells to recycle.
 * @param multipole_rec_begin Pointer to the start of the list of multipoles to
 * recycle.
 * @param multipole_rec_end Pointer to the end of the list of multipoles to
 * recycle.
138
 */
139
void space_rebuild_recycle_rec(struct space *s, struct cell *c,
140
141
                               struct cell **cell_rec_begin,
                               struct cell **cell_rec_end,
142
143
                               struct gravity_tensors **multipole_rec_begin,
                               struct gravity_tensors **multipole_rec_end) {
144
  if (c->split)
145
    for (int k = 0; k < 8; k++)
146
      if (c->progeny[k] != NULL) {
147
148
149
150
151
152
        space_rebuild_recycle_rec(s, c->progeny[k], cell_rec_begin,
                                  cell_rec_end, multipole_rec_begin,
                                  multipole_rec_end);

        c->progeny[k]->next = *cell_rec_begin;
        *cell_rec_begin = c->progeny[k];
153

154
        if (s->with_self_gravity) {
155
156
          c->progeny[k]->grav.multipole->next = *multipole_rec_begin;
          *multipole_rec_begin = c->progeny[k]->grav.multipole;
157
        }
158
159

        if (*cell_rec_end == NULL) *cell_rec_end = *cell_rec_begin;
160
        if (s->with_self_gravity && *multipole_rec_end == NULL)
161
162
          *multipole_rec_end = *multipole_rec_begin;

163
        c->progeny[k]->grav.multipole = NULL;
164
165
166
167
        c->progeny[k] = NULL;
      }
}

168
169
170
171
172
173
174
175
void space_rebuild_recycle_mapper(void *map_data, int num_elements,
                                  void *extra_data) {

  struct space *s = (struct space *)extra_data;
  struct cell *cells = (struct cell *)map_data;

  for (int k = 0; k < num_elements; k++) {
    struct cell *c = &cells[k];
176
    struct cell *cell_rec_begin = NULL, *cell_rec_end = NULL;
177
178
    struct gravity_tensors *multipole_rec_begin = NULL,
                           *multipole_rec_end = NULL;
179
180
181
182
183
    space_rebuild_recycle_rec(s, c, &cell_rec_begin, &cell_rec_end,
                              &multipole_rec_begin, &multipole_rec_end);
    if (cell_rec_begin != NULL)
      space_recycle_list(s, cell_rec_begin, cell_rec_end, multipole_rec_begin,
                         multipole_rec_end);
184
    c->hydro.sorts = NULL;
185
    c->nr_tasks = 0;
186
187
188
189
190
191
    c->grav.nr_mm_tasks = 0;
    c->hydro.density = NULL;
    c->hydro.gradient = NULL;
    c->hydro.force = NULL;
    c->grav.grav = NULL;
    c->grav.mm = NULL;
192
193
    c->hydro.dx_max_part = 0.0f;
    c->hydro.dx_max_sort = 0.0f;
Loic Hausammann's avatar
Loic Hausammann committed
194
    c->stars.dx_max_part = 0.f;
195
196
    c->hydro.sorted = 0;
    c->hydro.count = 0;
197
    c->hydro.count_total = 0;
198
199
    c->hydro.updated = 0;
    c->hydro.inhibited = 0;
200
    c->grav.count = 0;
201
    c->grav.count_total = 0;
202
203
    c->grav.updated = 0;
    c->grav.inhibited = 0;
204
    c->stars.count = 0;
205
    c->stars.count_total = 0;
206
207
    c->stars.updated = 0;
    c->stars.inhibited = 0;
208
209
210
211
212
213
    c->grav.init = NULL;
    c->grav.init_out = NULL;
    c->hydro.extra_ghost = NULL;
    c->hydro.ghost_in = NULL;
    c->hydro.ghost_out = NULL;
    c->hydro.ghost = NULL;
214
215
216
217
    c->stars.ghost_in = NULL;
    c->stars.ghost_out = NULL;
    c->stars.ghost = NULL;
    c->stars.density = NULL;
218
219
    c->kick1 = NULL;
    c->kick2 = NULL;
220
    c->timestep = NULL;
221
    c->end_force = NULL;
222
    c->hydro.drift = NULL;
223
    c->grav.drift = NULL;
224
    c->hydro.cooling = NULL;
225
    c->sourceterms = NULL;
226
227
228
229
    c->grav.long_range = NULL;
    c->grav.down_in = NULL;
    c->grav.down = NULL;
    c->grav.mesh = NULL;
230
    c->super = c;
231
232
233
234
    c->hydro.super = c;
    c->grav.super = c;
    c->hydro.parts = NULL;
    c->hydro.xparts = NULL;
235
236
    c->grav.parts = NULL;
    c->stars.parts = NULL;
237
238
239
240
241
242
243
    c->hydro.do_sub_sort = 0;
    c->grav.do_sub_drift = 0;
    c->hydro.do_sub_drift = 0;
    c->hydro.ti_end_min = -1;
    c->hydro.ti_end_max = -1;
    c->grav.ti_end_min = -1;
    c->grav.ti_end_max = -1;
244
245
246
#ifdef SWIFT_DEBUG_CHECKS
    c->cellID = 0;
#endif
247
248
    if (s->with_self_gravity)
      bzero(c->grav.multipole, sizeof(struct gravity_tensors));
249
    for (int i = 0; i < 13; i++)
250
251
252
      if (c->hydro.sort[i] != NULL) {
        free(c->hydro.sort[i]);
        c->hydro.sort[i] = NULL;
253
      }
254
#if WITH_MPI
255
256
    c->mpi.tag = -1;

257
258
259
260
    c->mpi.hydro.recv_xv = NULL;
    c->mpi.hydro.recv_rho = NULL;
    c->mpi.hydro.recv_gradient = NULL;
    c->mpi.grav.recv = NULL;
261
262
    c->mpi.recv_ti = NULL;

263
264
265
266
    c->mpi.hydro.send_xv = NULL;
    c->mpi.hydro.send_rho = NULL;
    c->mpi.hydro.send_gradient = NULL;
    c->mpi.grav.send = NULL;
267
    c->mpi.send_ti = NULL;
268
269
270
271
#endif
  }
}

272
273
274
275
/**
 * @brief Free up any allocated cells.
 */
void space_free_cells(struct space *s) {
276
277
278

  ticks tic = getticks();

Matthieu Schaller's avatar
Matthieu Schaller committed
279
280
  threadpool_map(&s->e->threadpool, space_rebuild_recycle_mapper, s->cells_top,
                 s->nr_cells, sizeof(struct cell), 0, s);
281
  s->maxdepth = 0;
282
283
284
285

  if (s->e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
286
287
}

288
/**
289
 * @brief Re-build the top-level cell grid.
290
 *
291
 * @param s The #space.
292
 * @param verbose Print messages to stdout or not.
293
 */
294
void space_regrid(struct space *s, int verbose) {
295

296
  const size_t nr_parts = s->nr_parts;
Loic Hausammann's avatar
Loic Hausammann committed
297
  const size_t nr_sparts = s->nr_sparts;
298
  const ticks tic = getticks();
299
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
300

301
302
  message("REGRID!!");

303
  /* Run through the cells and get the current h_max. */
304
  // tic = getticks();
305
  float h_max = s->cell_min / kernel_gamma / space_stretch;
306
  if (nr_parts > 0) {
307
308
309
310
311
312

    /* Can we use the list of local non-empty top-level cells? */
    if (s->local_cells_with_particles_top != NULL) {
      for (int k = 0; k < s->nr_local_cells_with_particles; ++k) {
        const struct cell *c =
            &s->cells_top[s->local_cells_with_particles_top[k]];
313
        if (c->hydro.h_max > h_max) {
314
          h_max = c->hydro.h_max;
315
        }
Loic Hausammann's avatar
Loic Hausammann committed
316
        if (c->stars.h_max > h_max) {
317
          h_max = c->stars.h_max;
Loic Hausammann's avatar
Loic Hausammann committed
318
        }
319
      }
320
321

      /* Can we instead use all the top-level cells? */
322
    } else if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
323
      for (int k = 0; k < s->nr_cells; k++) {
324
        const struct cell *c = &s->cells_top[k];
325
        if (c->nodeID == engine_rank && c->hydro.h_max > h_max) {
326
          h_max = c->hydro.h_max;
327
        }
Loic Hausammann's avatar
Loic Hausammann committed
328
        if (c->nodeID == engine_rank && c->stars.h_max > h_max) {
329
          h_max = c->stars.h_max;
Loic Hausammann's avatar
Loic Hausammann committed
330
        }
331
      }
332
333

      /* Last option: run through the particles */
334
    } else {
335
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
336
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
337
      }
Loic Hausammann's avatar
Loic Hausammann committed
338
339
340
      for (size_t k = 0; k < nr_sparts; k++) {
        if (s->sparts[k].h > h_max) h_max = s->sparts[k].h;
      }
341
342
343
344
345
346
347
348
349
350
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
351
      error("Failed to aggregate the rebuild flag across nodes.");
352
353
354
    h_max = buff;
  }
#endif
355
  if (verbose) message("h_max is %.3e (cell_min=%.3e).", h_max, s->cell_min);
356
357

  /* Get the new putative cell dimensions. */
358
  const int cdim[3] = {
359
360
361
362
363
364
      (int)floor(s->dim[0] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      (int)floor(s->dim[1] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min)),
      (int)floor(s->dim[2] /
                 fmax(h_max * kernel_gamma * space_stretch, s->cell_min))};
365
366
367
368
369

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
370
371
372
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
373
374
        " - the initial value of 'Scheduler:max_top_level_cells' is too "
        "small,\n"
375
        " - the (minimal) time-step is too large leading to particles with "
376
        "predicted smoothing lengths too large for the box size,\n"
377
        " - particles with velocities so large that they move by more than two "
378
        "box sizes per time-step.\n");
379

380
381
382
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
383
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
384
  double oldwidth[3];
385
386
387
388
389
390
391
392
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
393
394
395
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
396
397
398
399
400
401
402
403
404

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
405
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
406
407
408
409
410
        }
      }
    }
  }

Peter W. Draper's avatar
Peter W. Draper committed
411
  /* Are we about to allocate new top level cells without a regrid?
Peter W. Draper's avatar
Peter W. Draper committed
412
   * Can happen when restarting the application. */
413
  const int no_regrid = (s->cells_top == NULL && oldnodeIDs == NULL);
414
415
416
417
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
418
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
419
420
      cdim[2] < s->cdim[2]) {

421
422
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
423
    message("(re)griding space cdim=(%d %d %d)", cdim[0], cdim[1], cdim[2]);
424
425
426
    fflush(stdout);
#endif

427
    /* Free the old cells, if they were allocated. */
428
    if (s->cells_top != NULL) {
429
      space_free_cells(s);
430
      free(s->local_cells_with_tasks_top);
431
      free(s->local_cells_top);
432
      free(s->cells_with_particles_top);
433
      free(s->local_cells_with_particles_top);
434
      free(s->cells_top);
435
      free(s->multipoles_top);
436
437
    }

438
439
440
441
    /* Also free the task arrays, these will be regenerated and we can use the
     * memory while copying the particle arrays. */
    if (s->e != NULL) scheduler_free_tasks(&s->e->sched);

442
    /* Set the new cell dimensions only if smaller. */
443
    for (int k = 0; k < 3; k++) {
444
      s->cdim[k] = cdim[k];
445
446
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
447
    }
448
    const float dmin = min3(s->width[0], s->width[1], s->width[2]);
449
450
451

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
452
    if (posix_memalign((void **)&s->cells_top, cell_align,
453
                       s->nr_cells * sizeof(struct cell)) != 0)
454
      error("Failed to allocate top-level cells.");
455
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
456

457
    /* Allocate the multipoles for the top-level cells. */
458
    if (s->with_self_gravity) {
459
      if (posix_memalign((void **)&s->multipoles_top, multipole_align,
460
                         s->nr_cells * sizeof(struct gravity_tensors)) != 0)
461
        error("Failed to allocate top-level multipoles.");
462
      bzero(s->multipoles_top, s->nr_cells * sizeof(struct gravity_tensors));
463
464
    }

465
    /* Allocate the indices of local cells */
466
    if (posix_memalign((void **)&s->local_cells_top, SWIFT_STRUCT_ALIGNMENT,
467
468
469
470
                       s->nr_cells * sizeof(int)) != 0)
      error("Failed to allocate indices of local top-level cells.");
    bzero(s->local_cells_top, s->nr_cells * sizeof(int));

471
    /* Allocate the indices of local cells with tasks */
472
473
    if (posix_memalign((void **)&s->local_cells_with_tasks_top,
                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
474
      error("Failed to allocate indices of local top-level cells with tasks.");
475
476
    bzero(s->local_cells_with_tasks_top, s->nr_cells * sizeof(int));

477
    /* Allocate the indices of cells with particles */
478
    if (posix_memalign((void **)&s->cells_with_particles_top,
479
                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
480
481
      error("Failed to allocate indices of top-level cells with particles.");
    bzero(s->cells_with_particles_top, s->nr_cells * sizeof(int));
482

483
484
485
486
487
488
489
490
    /* Allocate the indices of local cells with particles */
    if (posix_memalign((void **)&s->local_cells_with_particles_top,
                       SWIFT_STRUCT_ALIGNMENT, s->nr_cells * sizeof(int)) != 0)
      error(
          "Failed to allocate indices of local top-level cells with "
          "particles.");
    bzero(s->local_cells_with_particles_top, s->nr_cells * sizeof(int));

491
    /* Set the cells' locks */
492
    for (int k = 0; k < s->nr_cells; k++) {
493
      if (lock_init(&s->cells_top[k].hydro.lock) != 0)
494
        error("Failed to init spinlock for hydro.");
495
      if (lock_init(&s->cells_top[k].grav.plock) != 0)
496
        error("Failed to init spinlock for gravity.");
497
      if (lock_init(&s->cells_top[k].grav.mlock) != 0)
498
        error("Failed to init spinlock for multipoles.");
499
      if (lock_init(&s->cells_top[k].stars.lock) != 0)
500
501
        error("Failed to init spinlock for stars.");
    }
502
503

    /* Set the cell location and sizes. */
504
505
506
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
507
508
          const size_t cid = cell_getid(cdim, i, j, k);
          struct cell *restrict c = &s->cells_top[cid];
509
510
511
512
513
514
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
515
516
          c->dmin = dmin;
          c->depth = 0;
517
          c->split = 0;
518
          c->hydro.count = 0;
519
520
          c->grav.count = 0;
          c->stars.count = 0;
521
          c->super = c;
522
523
          c->hydro.super = c;
          c->grav.super = c;
524
525
          c->hydro.ti_old_part = ti_current;
          c->grav.ti_old_part = ti_current;
526
          c->grav.ti_old_multipole = ti_current;
Pedro Gonnet's avatar
Pedro Gonnet committed
527
#ifdef WITH_MPI
528
          c->mpi.tag = -1;
529
530
531
532
533
534
535
536
          c->mpi.hydro.recv_xv = NULL;
          c->mpi.hydro.recv_rho = NULL;
          c->mpi.hydro.recv_gradient = NULL;
          c->mpi.hydro.send_xv = NULL;
          c->mpi.hydro.send_rho = NULL;
          c->mpi.hydro.send_gradient = NULL;
          c->mpi.grav.recv = NULL;
          c->mpi.grav.send = NULL;
Pedro Gonnet's avatar
Pedro Gonnet committed
537
#endif  // WITH_MPI
538
          if (s->with_self_gravity) c->grav.multipole = &s->multipoles_top[cid];
539
#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
540
541
          c->cellID = -last_cell_id;
          last_cell_id++;
542
#endif
Pedro Gonnet's avatar
Pedro Gonnet committed
543
        }
544
545

    /* Be verbose about the change. */
546
547
548
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
549

550
#ifdef WITH_MPI
551
552
553
554
555
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
556
557
558
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
559

Matthieu Schaller's avatar
Matthieu Schaller committed
560
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
561
562
563
564

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
565
#if defined(HAVE_PARMETIS) || defined(HAVE_METIS)
566
567
568
569
570
571
572
573
574
575
576
577
578
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
579

580
581
      /* Finished with these. */
      free(oldnodeIDs);
Peter W. Draper's avatar
Peter W. Draper committed
582
583

    } else if (no_regrid && s->e != NULL) {
Peter W. Draper's avatar
Peter W. Draper committed
584
585
586
587
588
589
590
591
592
593
594
595
596
      /* If we have created the top-levels cells and not done an initial
       * partition (can happen when restarting), then the top-level cells
       * are not assigned to a node, we must do that and then associate the
       * particles with the cells. Note requires that
       * partition_store_celllist() was called once before, or just before
       * dumping the restart files.*/
      partition_restore_celllist(s, s->e->reparttype);

      /* Now re-distribute the particles, should just add to cells? */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
597
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
598
#endif /* WITH_MPI */
599
600
601
602

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

603
  }      /* re-build upper-level cells? */
604
  else { /* Otherwise, just clean up the cells. */
605
606

    /* Free the old cells, if they were allocated. */
607
    space_free_cells(s);
608
  }
609
610
611
612

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
613
}
614

615
616
617
618
619
620
/**
 * @brief Allocate memory for the extra particles used for on-the-fly creation.
 *
 * This rarely actually allocates memory. Most of the time, we convert
 * pre-allocated memory inot extra particles.
 *
621
622
 * This function also sets the extra particles' location to their top-level
 * cells. They can then be sorted into their correct memory position later on.
623
624
625
626
 *
 * @param s The current #space.
 * @param verbose Are we talkative?
 */
627
628
629
630
void space_allocate_extras(struct space *s, int verbose) {

  const int local_nodeID = s->e->nodeID;

631
632
633
634
635
  /* Anything to do here? (Abort if we don't want extras)*/
  if (space_extra_parts == 0 && space_extra_gparts == 0 &&
      space_extra_sparts == 0)
    return;

636
637
638
639
640
641
  /* The top-level cells */
  const struct cell *cells = s->cells_top;
  const double half_cell_width[3] = {0.5 * cells[0].width[0],
                                     0.5 * cells[0].width[1],
                                     0.5 * cells[0].width[2]};

642
  /* The current number of particles (including spare ones) */
643
644
645
646
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
  size_t nr_sparts = s->nr_sparts;

647
648
649
650
651
  /* The current number of actual particles */
  size_t nr_actual_parts = nr_parts - s->nr_extra_parts;
  size_t nr_actual_gparts = nr_gparts - s->nr_extra_gparts;
  size_t nr_actual_sparts = nr_sparts - s->nr_extra_sparts;

652
653
654
655
656
657
658
659
660
  /* The number of particles we allocated memory for (MPI overhead) */
  size_t size_parts = s->size_parts;
  size_t size_gparts = s->size_gparts;
  size_t size_sparts = s->size_sparts;

  int local_cells = 0;
  for (int i = 0; i < s->nr_cells; ++i)
    if (s->cells_top[i].nodeID == local_nodeID) local_cells++;

661
662
  /* Number of extra particles we want for each type */
  const size_t expected_num_extra_parts = local_cells * space_extra_parts;
663
664
  const size_t expected_num_extra_gparts = local_cells * space_extra_gparts;
  const size_t expected_num_extra_sparts = local_cells * space_extra_sparts;
665

666
667
668
  if (verbose) {
    message("Currently have %zd/%zd/%zd real particles.", nr_actual_parts,
            nr_actual_gparts, nr_actual_sparts);
669
    message("Currently have %zd/%zd/%zd spaces for extra particles.",
670
            s->nr_extra_parts, s->nr_extra_gparts, s->nr_extra_sparts);
671
    message("Requesting space for future %zd/%zd/%zd part/gpart/sparts.",
672
673
674
            expected_num_extra_parts, expected_num_extra_gparts,
            expected_num_extra_sparts);
  }
675

676
677
678
679
680
681
682
  if (expected_num_extra_parts < s->nr_extra_parts)
    error("Reduction in top-level cells number not handled.");
  if (expected_num_extra_gparts < s->nr_extra_gparts)
    error("Reduction in top-level cells number not handled.");
  if (expected_num_extra_sparts < s->nr_extra_sparts)
    error("Reduction in top-level cells number not handled.");

683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
  /* Do we have enough space for the extra gparts (i.e. we haven't used up any)
   * ? */
  if (nr_gparts + expected_num_extra_gparts > size_gparts) {

    /* Ok... need to put some more in the game */

    /* Do we need to reallocate? */
    if (nr_actual_gparts + expected_num_extra_gparts > size_gparts) {

      size_gparts = (nr_actual_gparts + expected_num_extra_gparts) *
                    engine_redistribute_alloc_margin;

      if (verbose)
        message("Re-allocating gparts array from %zd to %zd", s->size_gparts,
                size_gparts);

      /* Create more space for parts */
      struct gpart *gparts_new = NULL;
      if (posix_memalign((void **)&gparts_new, gpart_align,
                         sizeof(struct gpart) * size_gparts) != 0)
        error("Failed to allocate new gpart data");
      const ptrdiff_t delta = gparts_new - s->gparts;
      memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->size_gparts);
      free(s->gparts);
      s->gparts = gparts_new;

      /* Update the counter */
      s->size_gparts = size_gparts;

      /* We now need to reset all the part and spart pointers */
      for (size_t i = 0; i < nr_parts; ++i) {
        if (s->parts[i].time_bin != time_bin_not_created)
          s->parts[i].gpart += delta;
      }
      for (size_t i = 0; i < nr_sparts; ++i) {
        if (s->sparts[i].time_bin != time_bin_not_created)
          s->sparts[i].gpart += delta;
      }
    }

    /* Turn some of the allocated spares into particles we can use */
    for (size_t i = nr_gparts; i < nr_actual_gparts + expected_num_extra_gparts;
         ++i) {
      bzero(&s->gparts[i], sizeof(struct gpart));
      s->gparts[i].time_bin = time_bin_not_created;
      s->gparts[i].type = swift_type_dark_matter;
      s->gparts[i].id_or_neg_offset = -1;
    }

      /* Put the spare particles in their correct cell */
#ifdef WITH_MPI
    error("Need to do this correctly over MPI for only the local cells.");
#endif
    int count_in_cell = 0, current_cell = 0;
    size_t count_extra_gparts = 0;
    for (size_t i = 0; i < nr_actual_gparts + expected_num_extra_gparts; ++i) {

#ifdef SWIFT_DEBUG_CHECKS
      if (current_cell == s->nr_cells)
        error("Cell counter beyond the maximal nr. cells.");
#endif

      if (s->gparts[i].time_bin == time_bin_not_created) {

        /* We want the extra particles to be at the centre of their cell */
        s->gparts[i].x[0] = cells[current_cell].loc[0] + half_cell_width[0];
        s->gparts[i].x[1] = cells[current_cell].loc[1] + half_cell_width[1];
        s->gparts[i].x[2] = cells[current_cell].loc[2] + half_cell_width[2];
        ++count_in_cell;
        count_extra_gparts++;
      }

      /* Once we have reached the number of extra gpart per cell, we move to the
       * next */
      if (count_in_cell == space_extra_gparts) {
        ++current_cell;
        count_in_cell = 0;
      }
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (count_extra_gparts != expected_num_extra_gparts)
      error("Constructed the wrong number of extra gparts (%zd vs. %zd)",
            count_extra_gparts, expected_num_extra_gparts);
#endif

    /* Update the counters */
    s->nr_gparts = nr_actual_gparts + expected_num_extra_gparts;
    s->nr_extra_gparts = expected_num_extra_gparts;
  }

774
775
  /* Do we have enough space for the extra parts (i.e. we haven't used up any) ?
   */
776
777
  if (expected_num_extra_parts > s->nr_extra_parts) {

778
779
    /* Ok... need to put some more in the game */

780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
    /* Do we need to reallocate? */
    if (nr_actual_parts + expected_num_extra_parts > size_parts) {

      size_parts = (nr_actual_parts + expected_num_extra_parts) *
                   engine_redistribute_alloc_margin;

      if (verbose)
        message("Re-allocating parts array from %zd to %zd", s->size_parts,
                size_parts);

      /* Create more space for parts */
      struct part *parts_new = NULL;
      if (posix_memalign((void **)&parts_new, part_align,
                         sizeof(struct part) * size_parts) != 0)
        error("Failed to allocate new part data");
      memcpy(parts_new, s->parts, sizeof(struct part) * s->size_parts);
      free(s->parts);
      s->parts = parts_new;

      /* Same for xparts */
      struct xpart *xparts_new = NULL;
      if (posix_memalign((void **)&xparts_new, xpart_align,
                         sizeof(struct xpart) * size_parts) != 0)
        error("Failed to allocate new xpart data");
      memcpy(xparts_new, s->xparts, sizeof(struct xpart) * s->size_parts);
      free(s->xparts);
      s->xparts = xparts_new;

      /* Update the counter */
      s->size_parts = size_parts;
    }

812
    /* Turn some of the allocated spares into particles we can use */
813
814
815
816
817
    for (size_t i = nr_parts; i < nr_actual_parts + expected_num_extra_parts;
         ++i) {
      bzero(&s->parts[i], sizeof(struct part));
      bzero(&s->xparts[i], sizeof(struct xpart));
      s->parts[i].time_bin = time_bin_not_created;
818
      s->parts[i].id = -1;
819
820
    }

821
822
823
824
825
826
827
      /* Put the spare particles in their correct cell */
#ifdef WITH_MPI
    error("Need to do this correctly over MPI for only the local cells.");
#endif
    int count_in_cell = 0, current_cell = 0;
    size_t count_extra_parts = 0;
    for (size_t i = 0; i < nr_actual_parts + expected_num_extra_parts; ++i) {
828
829
830
831
832
833

#ifdef SWIFT_DEBUG_CHECKS
      if (current_cell == s->nr_cells)
        error("Cell counter beyond the maximal nr. cells.");
#endif

834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
      if (s->parts[i].time_bin == time_bin_not_created) {

        /* We want the extra particles to be at the centre of their cell */
        s->parts[i].x[0] = cells[current_cell].loc[0] + half_cell_width[0];
        s->parts[i].x[1] = cells[current_cell].loc[1] + half_cell_width[1];
        s->parts[i].x[2] = cells[current_cell].loc[2] + half_cell_width[2];
        ++count_in_cell;
        count_extra_parts++;
      }

      /* Once we have reached the number of extra part per cell, we move to the
       * next */
      if (count_in_cell == space_extra_parts) {
        ++current_cell;
        count_in_cell = 0;
      }
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (count_extra_parts != expected_num_extra_parts)
      error("Constructed the wrong number of extra parts (%zd vs. %zd)",
            count_extra_parts, expected_num_extra_parts);
#endif

858
859
860
    /* Update the counters */
    s->nr_parts = nr_actual_parts + expected_num_extra_parts;
    s->nr_extra_parts = expected_num_extra_parts;
861
  }
862

863
864
  /* Do we have enough space for the extra sparts (i.e. we haven't used up any)
   * ? */
865
  if (nr_actual_sparts + expected_num_extra_sparts > nr_sparts) {
866

867
868
    /* Ok... need to put some more in the game */

869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
    /* Do we need to reallocate? */
    if (nr_actual_sparts + expected_num_extra_sparts > size_sparts) {

      size_sparts = (nr_actual_sparts + expected_num_extra_sparts) *
                    engine_redistribute_alloc_margin;

      if (verbose)
        message("Re-allocating sparts array from %zd to %zd", s->size_sparts,
                size_sparts);

      /* Create more space for parts */
      struct spart *sparts_new = NULL;
      if (posix_memalign((void **)&sparts_new, spart_align,
                         sizeof(struct spart) * size_sparts) != 0)
        error("Failed to allocate new spart data");
      memcpy(sparts_new, s->sparts, sizeof(struct spart) * s->size_sparts);
      free(s->sparts);
      s->sparts = sparts_new;

      /* Update the counter */
      s->size_sparts = size_sparts;
    }

    /* Turn some of the allocated spares into particles we can use */
    for (size_t i = nr_sparts; i < nr_actual_sparts + expected_num_extra_sparts;
         ++i) {
      bzero(&s->sparts[i], sizeof(struct spart));
      s->sparts[i].time_bin = time_bin_not_created;
897
      s->sparts[i].id = -42;
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
    }

      /* Put the spare particles in their correct cell */
#ifdef WITH_MPI
    error("Need to do this correctly over MPI for only the local cells.");
#endif
    int count_in_cell = 0, current_cell = 0;
    size_t count_extra_sparts = 0;
    for (size_t i = 0; i < nr_actual_sparts + expected_num_extra_sparts; ++i) {

#ifdef SWIFT_DEBUG_CHECKS
      if (current_cell == s->nr_cells)
        error("Cell counter beyond the maximal nr. cells.");
#endif

      if (s->sparts[i].time_bin == time_bin_not_created) {

        /* We want the extra particles to be at the centre of their cell */
        s->sparts[i].x[0] = cells[current_cell].loc[0] + half_cell_width[0];
        s->sparts[i].x[1] = cells[current_cell].loc[1] + half_cell_width[1];
        s->sparts[i].x[2] = cells[current_cell].loc[2] + half_cell_width[2];
        ++count_in_cell;
        count_extra_sparts++;
      }

      /* Once we have reached the number of extra spart per cell, we move to the
       * next */
      if (count_in_cell == space_extra_sparts) {
        ++current_cell;
        count_in_cell = 0;
      }
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (count_extra_sparts != expected_num_extra_sparts)
      error("Constructed the wrong number of extra sparts (%zd vs. %zd)",
            count_extra_sparts, expected_num_extra_sparts);
#endif

    /* Update the counters */
    s->nr_sparts = nr_actual_sparts + expected_num_extra_sparts;
    s->nr_extra_sparts = expected_num_extra_sparts;
940
  }
941
942
943
944
945
946
947

#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the links are correct */
  if ((nr_gparts > 0 && nr_parts > 0) || (nr_gparts > 0 && nr_sparts > 0))
    part_verify_links(s->parts, s->gparts, s->sparts, nr_parts, nr_gparts,
                      nr_sparts, verbose);
#endif
948
949
}

950
951
952
953
/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
954
 * @param repartitioned Did we just repartition?
955
 * @param verbose Print messages to stdout or not
956
 */
957
void space_rebuild(struct space *s, int repartitioned, int verbose) {
958

Matthieu Schaller's avatar
Matthieu Schaller committed
959
  const ticks tic = getticks();
960

961
962
/* Be verbose about this. */
#ifdef SWIFT_DEBUG_CHECKS
Pedro Gonnet's avatar
Pedro Gonnet committed
963
  if (s->e->nodeID == 0 || verbose) message("(re)building space");
964
965
  fflush(stdout);
#endif
966
967

  /* Re-grid if necessary, or just re-set the cell data. */
968
  space_regrid(s, verbose);
969

970
971
972
  /* Allocate extra space for particles that will be created */
  space_allocate_extras(s, verbose);

973
974
  struct cell *cells_top = s->cells_top;
  const integertime_t ti_current = (s->e != NULL) ? s->e->ti_current : 0;
975
  const int local_nodeID = s->e->nodeID;
976
977

  /* The current number of particles */
Pedro Gonnet's avatar
Pedro Gonnet committed
978
979
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
980
  size_t nr_sparts = s->nr_sparts;
981
982
983
984
985
986

  /* The number of particles we allocated memory for */
  size_t size_parts = s->size_parts;
  size_t size_gparts = s->size_gparts;
  size_t size_sparts = s->size_sparts;

987
  /* Counter for the number of inhibited particles found on the node */
988
989
990
991
  size_t count_inhibited_parts = 0;
  size_t count_inhibited_gparts = 0;
  size_t count_inhibited_sparts = 0;

992
  /* Counter for the number of extra particles found on the node */
993
994
995
  size_t count_extra_parts = 0;
  size_t count_extra_gparts = 0;
  size_t count_extra_sparts = 0;
996
997
998
999
1000

  /* Number of particles we expect to have after strays exchange */
  const size_t h_index_size = size_parts + space_expected_max_nr_strays;
  const size_t g_index_size = size_gparts + space_expected_max_nr_strays;
  const size_t s_index_size = size_sparts + space_expected_max_nr_strays;
For faster browsing, not all history is shown. View entire blame