space.c 54.1 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "const.h"
45
#include "cooling.h"
46
#include "engine.h"
47
#include "error.h"
48
49
#include "gravity.h"
#include "hydro.h"
50
#include "kernel_hydro.h"
51
#include "lock.h"
52
#include "minmax.h"
53
#include "runner.h"
54
#include "threadpool.h"
55
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
56
57
58

/* Split size. */
int space_splitsize = space_splitsize_default;
59
int space_subsize = space_subsize_default;
60
int space_maxsize = space_maxsize_default;
61
int space_maxcount = space_maxcount_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
62
63
64

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/**
 * @brief Interval stack necessary for parallel particle sorting.
 */
struct qstack {
  volatile ptrdiff_t i, j;
  volatile int min, max;
  volatile int ready;
};

/**
 * @brief Parallel particle-sorting stack
 */
struct parallel_sort {
  struct part *parts;
  struct gpart *gparts;
  struct xpart *xparts;
  int *ind;
  struct qstack *stack;
  unsigned int stack_size;
  volatile unsigned int first, last, waiting;
};

115
116
117
118
119
120
121
122
123
124
125
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
126
127
128
129
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
130
131
132
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
133
134
135
136
137
138
139
140
141
142
143
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
144
  int sid = 0;
145
  for (int k = 0; k < 3; k++)
146
147
148
149
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
150
    struct cell *temp = *ci;
151
152
    *ci = *cj;
    *cj = temp;
153
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
154
155
156
157
158
159
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
160

161
/**
162
 * @brief Recursively dismantle a cell tree.
163
 *
164
165
 * @param s The #space.
 * @param c The #cell to recycle.
166
 */
167
168
169
void space_rebuild_recycle(struct space *s, struct cell *c) {

  if (c->split)
170
    for (int k = 0; k < 8; k++)
171
172
173
174
175
176
177
      if (c->progeny[k] != NULL) {
        space_rebuild_recycle(s, c->progeny[k]);
        space_recycle(s, c->progeny[k]);
        c->progeny[k] = NULL;
      }
}

178
/**
179
 * @brief Re-build the top-level cell grid.
180
 *
181
182
 * @param s The #space.
 * @param cell_max Maximum cell edge length.
183
 * @param verbose Print messages to stdout or not.
184
 */
185
void space_regrid(struct space *s, double cell_max, int verbose) {
186

187
  const size_t nr_parts = s->nr_parts;
188
  const ticks tic = getticks();
189
  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
190

191
  /* Run through the cells and get the current h_max. */
192
  // tic = getticks();
193
  float h_max = s->cell_min / kernel_gamma / space_stretch;
194
  if (nr_parts > 0) {
195
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
196
      for (int k = 0; k < s->nr_cells; k++) {
197
198
199
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
200
        }
201
202
      }
    } else {
203
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
204
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
205
      }
206
207
208
209
210
211
212
213
214
215
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
216
      error("Failed to aggregate the rebuild flag across nodes.");
217
218
219
    h_max = buff;
  }
#endif
220
  if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
221
222

  /* Get the new putative cell dimensions. */
223
224
225
226
  const int cdim[3] = {
      floor(s->dim[0] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
      floor(s->dim[1] / fmax(h_max * kernel_gamma * space_stretch, cell_max)),
      floor(s->dim[2] / fmax(h_max * kernel_gamma * space_stretch, cell_max))};
227
228
229
230
231

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
232
233
234
235
        "is switched on.\nThis error is often caused by any of the "
        "followings:\n"
        " - too few particles to generate a sensible grid,\n"
        " - the initial value of 'SPH:max_smoothing_length' is too large,\n"
236
        " - the (minimal) time-step is too large leading to particles with "
237
238
239
        "predicted smoothing lengths too large for the box size,\n"
        " - particle with velocities so large that they move by more than two "
        "box sizes per time-step.\n");
240

241
242
243
244
245
246
  /* Check if we have enough cells for gravity. */
  if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
    error(
        "Must have at least 8 cells in each spatial dimension when gravity "
        "is switched on.");

247
248
249
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
250
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
251
  double oldwidth[3];
252
253
254
255
256
257
258
259
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
260
261
262
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
263
264
265
266
267
268
269
270
271

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
272
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
273
274
275
276
277
        }
      }
    }
  }

278
279
280
281
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
282
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
283
284
285
      cdim[2] < s->cdim[2]) {

    /* Free the old cells, if they were allocated. */
286
    if (s->cells_top != NULL) {
287
      for (int k = 0; k < s->nr_cells; k++) {
288
289
        space_rebuild_recycle(s, &s->cells_top[k]);
        if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
290
      }
291
      free(s->cells_top);
292
293
294
295
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
296
    for (int k = 0; k < 3; k++) {
297
      s->cdim[k] = cdim[k];
298
299
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
300
    }
301
    const float dmin = min(s->width[0], min(s->width[1], s->width[2]));
302
303
304

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
305
    if (posix_memalign((void *)&s->cells_top, cell_align,
306
307
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
308
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
309
    for (int k = 0; k < s->nr_cells; k++)
310
311
      if (lock_init(&s->cells_top[k].lock) != 0)
        error("Failed to init spinlock.");
312
313

    /* Set the cell location and sizes. */
314
315
316
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
317
          struct cell *restrict c = &s->cells_top[cell_getid(cdim, i, j, k)];
318
319
320
321
322
323
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
324
325
326
327
328
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
          c->super = c;
329
          c->gsuper = c;
330
          c->ti_old = ti_current;
331
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
332
        }
333
334

    /* Be verbose about the change. */
335
336
337
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
338
339
    fflush(stdout);

340
#ifdef WITH_MPI
341
342
343
344
345
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
346
347
348
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
349

Matthieu Schaller's avatar
Matthieu Schaller committed
350
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
369

370
371
      /* Finished with these. */
      free(oldnodeIDs);
372
373
    }
#endif
374
375
376
377

    // message( "rebuilding upper-level cells took %.3f %s." ,
    // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());

378
379
  } /* re-build upper-level cells? */

380
  else { /* Otherwise, just clean up the cells. */
381
382

    /* Free the old cells, if they were allocated. */
383
    for (int k = 0; k < s->nr_cells; k++) {
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
      space_rebuild_recycle(s, &s->cells_top[k]);
      s->cells_top[k].sorts = NULL;
      s->cells_top[k].nr_tasks = 0;
      s->cells_top[k].nr_density = 0;
      s->cells_top[k].nr_gradient = 0;
      s->cells_top[k].nr_force = 0;
      s->cells_top[k].density = NULL;
      s->cells_top[k].gradient = NULL;
      s->cells_top[k].force = NULL;
      s->cells_top[k].dx_max = 0.0f;
      s->cells_top[k].sorted = 0;
      s->cells_top[k].count = 0;
      s->cells_top[k].gcount = 0;
      s->cells_top[k].init = NULL;
      s->cells_top[k].extra_ghost = NULL;
      s->cells_top[k].ghost = NULL;
      s->cells_top[k].kick = NULL;
      s->cells_top[k].super = &s->cells_top[k];
402
      s->cells_top[k].gsuper = &s->cells_top[k];
403
    }
404
405
    s->maxdepth = 0;
  }
406
407
408
409

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
410
}
411
412
413
414
415
416

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
 * @param cell_max Maximal cell size.
417
 * @param verbose Print messages to stdout or not
418
419
 *
 */
420
void space_rebuild(struct space *s, double cell_max, int verbose) {
421

Matthieu Schaller's avatar
Matthieu Schaller committed
422
  const ticks tic = getticks();
423
424

  /* Be verbose about this. */
425
  // message("re)building space..."); fflush(stdout);
426
427

  /* Re-grid if necessary, or just re-set the cell data. */
428
  space_regrid(s, cell_max, verbose);
429

Pedro Gonnet's avatar
Pedro Gonnet committed
430
431
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
432
  struct cell *restrict cells_top = s->cells_top;
433
  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
434
435
436

  /* Run through the particles and get their cell index. */
  const size_t ind_size = s->size_parts;
437
438
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
439
    error("Failed to allocate temporary particle indices.");
440
  space_parts_get_cell_index(s, ind, cells_top);
441

442
443
444
445
446
  /* Run through the gravity particles and get their cell index. */
  const size_t gind_size = s->size_gparts;
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
447
  space_gparts_get_cell_index(s, ind, cells_top);
448
449

#ifdef WITH_MPI
450

451
  /* Move non-local parts to the end of the list. */
452
  const int local_nodeID = s->e->nodeID;
453
  for (size_t k = 0; k < nr_parts;) {
454
455
    if (cells_top[ind[k]].nodeID != local_nodeID) {
      cells_top[ind[k]].count -= 1;
456
      nr_parts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
457
      const struct part tp = s->parts[k];
458
459
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
460
      if (s->parts[k].gpart != NULL) {
461
        s->parts[k].gpart->id_or_neg_offset = -k;
462
463
      }
      if (s->parts[nr_parts].gpart != NULL) {
464
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
465
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
466
      const struct xpart txp = s->xparts[k];
467
468
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
469
      const int t = ind[k];
470
471
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
472
    } else {
473
474
475
476
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
477

478
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
479
  /* Check that all parts are in the correct places. */
480
  for (size_t k = 0; k < nr_parts; k++) {
481
    if (cells_top[ind[k]].nodeID != local_nodeID) {
482
483
484
485
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
486
    if (cells_top[ind[k]].nodeID == local_nodeID) {
487
      error("Failed to remove local parts from send list");
488
    }
489
490
  }
#endif
491

492
  /* Move non-local gparts to the end of the list. */
493
  for (size_t k = 0; k < nr_gparts;) {
494
495
    if (cells_top[gind[k]].nodeID != local_nodeID) {
      cells_top[gind[k]].gcount -= 1;
496
      nr_gparts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
497
      const struct gpart tp = s->gparts[k];
498
499
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
500
501
      if (s->gparts[k].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
502
      }
503
504
505
      if (s->gparts[nr_gparts].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
506
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
507
508
509
      const int t = gind[k];
      gind[k] = gind[nr_gparts];
      gind[nr_gparts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
510
    } else {
511
512
513
514
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
515

516
#ifdef SWIFT_DEBUG_CHECKS
517
518
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
519
    if (cells_top[gind[k]].nodeID != local_nodeID) {
520
521
522
523
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
524
    if (cells_top[gind[k]].nodeID == local_nodeID) {
525
      error("Failed to remove local gparts from send list");
526
    }
527
528
  }
#endif
529

530
531
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
532
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
533
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
534
535
536
537
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);

  /* Set the new particle counts. */
538
  s->nr_parts = nr_parts + nr_parts_exchanged;
539
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
540
541

  /* Re-allocate the index array if needed.. */
542
  if (s->nr_parts > ind_size) {
543
544
    int *ind_new;
    if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
545
      error("Failed to allocate temporary particle indices.");
546
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
547
548
    free(ind);
    ind = ind_new;
549
550
  }

551
552
553
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

554
  /* Assign each particle to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
555
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
556
    const struct part *const p = &s->parts[k];
557
    ind[k] =
558
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
559
    cells_top[ind[k]].count += 1;
560
#ifdef SWIFT_DEBUG_CHECKS
561
    if (cells_top[ind[k]].nodeID != local_nodeID)
562
      error("Received part that does not belong to me (nodeID=%i).",
563
            cells_top[ind[k]].nodeID);
564
#endif
565
  }
566
  nr_parts = s->nr_parts;
567
568

#endif /* WITH_MPI */
569
570

  /* Sort the parts according to their cells. */
571
  space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
572
573

  /* Re-link the gparts. */
574
  if (nr_parts > 0 && nr_gparts > 0) part_relink_gparts(s->parts, nr_parts, 0);
575

576
#ifdef SWIFT_DEBUG_CHECKS
577
  /* Verify space_sort_struct. */
578
579
580
581
582
583
584
585
586
587
  for (size_t k = 1; k < nr_parts; k++) {
    if (ind[k - 1] > ind[k]) {
      error("Sort failed!");
    } else if (ind[k] != cell_getid(cdim, s->parts[k].x[0] * ih[0],
                                    s->parts[k].x[1] * ih[1],
                                    s->parts[k].x[2] * ih[2])) {
      error("Incorrect indices!");
    }
  }
#endif
588
589

  /* We no longer need the indices as of here. */
590
  free(ind);
591

592
593
594
595
#ifdef WITH_MPI

  /* Re-allocate the index array if needed.. */
  if (s->nr_gparts > gind_size) {
596
597
    int *gind_new;
    if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
598
      error("Failed to allocate temporary g-particle indices.");
599
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
600
601
602
603
604
    free(gind);
    gind = gind_new;
  }

  /* Assign each particle to its cell. */
605
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
606
    const struct gpart *const p = &s->gparts[k];
607
608
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
609
610
611
612
613
614
615
    cells_top[gind[k]].gcount += 1;

#ifdef SWIFT_DEBUG_CHECKS
    if (cells_top[ind[k]].nodeID != s->e->nodeID)
      error("Received part that does not belong to me (nodeID=%i).",
            cells_top[ind[k]].nodeID);
#endif
616
617
618
619
  }
  nr_gparts = s->nr_gparts;

#endif
620

621
  /* Sort the gparts according to their cells. */
Matthieu Schaller's avatar
Matthieu Schaller committed
622
  space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
623
624

  /* Re-link the parts. */
625
626
  if (nr_parts > 0 && nr_gparts > 0)
    part_relink_parts(s->gparts, nr_gparts, s->parts);
627
628

  /* We no longer need the indices as of here. */
629
  free(gind);
630

631
#ifdef SWIFT_DEBUG_CHECKS
632
633
634
  /* Verify that the links are correct */
  for (size_t k = 0; k < nr_gparts; ++k) {

635
    if (s->gparts[k].id_or_neg_offset < 0) {
636

637
      const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset];
638

639
640
641
642
      if (part->gpart != &s->gparts[k]) error("Linking problem !");

      if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] ||
          s->gparts[k].x[2] != part->x[2])
643
644
645
646
647
        error("Linked particles are not at the same position !");
    }
  }
  for (size_t k = 0; k < nr_parts; ++k) {

648
    if (s->parts[k].gpart != NULL &&
649
        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
650
      error("Linking problem !");
651
652
    }
  }
653
#endif
654

655
656
  /* Hook the cells up to the parts. */
  // tic = getticks();
657
658
659
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
660
  for (int k = 0; k < s->nr_cells; k++) {
661
    struct cell *restrict c = &cells_top[k];
662
    c->ti_old = ti_current;
663
664
665
666
667
668
669
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
  }
670
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
671
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
672
673
674

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
675
  space_split(s, cells_top, s->nr_cells, verbose);
676
677
678
679
680
681
682
683
684

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
685
686
 * This is done in parallel using threads in the #threadpool.
 *
687
 * @param s The #space.
688
689
 * @param cells The cell hierarchy.
 * @param nr_cells The number of cells.
690
691
 * @param verbose Are we talkative ?
 */
692
693
void space_split(struct space *s, struct cell *cells, int nr_cells,
                 int verbose) {
694

Matthieu Schaller's avatar
Matthieu Schaller committed
695
  const ticks tic = getticks();
696

697
  threadpool_map(&s->e->threadpool, space_split_mapper, cells, nr_cells,
698
                 sizeof(struct cell), 1, s);
699

700
701
702
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
703
}
704

705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
/**
 * @brief Runs through the top-level cells and checks whether tasks associated
 * with them can be split. If not, try to sanitize the cells.
 *
 * @param s The #space to act upon.
 */
void space_sanitize(struct space *s) {

  for (int k = 0; k < s->nr_cells; k++) {

    struct cell *c = &s->cells_top[k];
    const double min_width = c->dmin;

    /* Do we have a problem ? */
    if (c->h_max * kernel_gamma * space_stretch > min_width * 0.5 &&
        c->count > space_maxcount) {

      /* Ok, clean-up the mess */
      cell_sanitize(c);
    }
  }
}

728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
/**
 * @brief Computes the cell index of all the particles and update the cell count.
 *
 * @param s The #space.
 * @param ind The array of indices to fill.
 * @param cells The array of #cell to update.
 */
void space_parts_get_cell_index(struct space *s, int *ind, struct cell *cells) {

  const size_t nr_parts = s->nr_parts;
  struct part *parts = s->parts;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

  for (size_t k = 0; k < nr_parts; k++) {

    /* Get the particle */
    struct part *restrict p = &parts[k];

    /* Put it back into the simulation volume */
    for (int j = 0; j < 3; j++)
      if (p->x[j] < 0.0)
        p->x[j] += dim[j];
      else if (p->x[j] >= dim[j])
        p->x[j] -= dim[j];

    /* Get its cell index */
    const int index =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
    ind[k] = index;

    /* Tell the cell it has a new member */
    cells[index].count++;
  }
}

/**
 * @brief Computes the cell index of all the g-particles and update the cell gcount.
 *
 * @param s The #space.
 * @param gind The array of indices to fill.
 * @param cells The array of #cell to update.
 */
void space_gparts_get_cell_index(struct space *s, int *gind,
                                 struct cell *cells) {

  const size_t nr_gparts = s->nr_gparts;
  struct gpart *gparts = s->gparts;
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};

  for (size_t k = 0; k < nr_gparts; k++) {

    /* Get the particle */
    struct gpart *restrict gp = &gparts[k];

    /* Put it back into the simulation volume */
    for (int j = 0; j < 3; j++)
      if (gp->x[j] < 0.0)
        gp->x[j] += dim[j];
      else if (gp->x[j] >= dim[j])
        gp->x[j] -= dim[j];

    /* Get its cell index */
    const int index =
        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
    gind[k] = index;

    /* Tell the cell it has a new member */
    cells[index].gcount++;
  }
}

803
/**
804
 * @brief Sort the particles and condensed particles according to the given
805
 * indices.
806
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
807
 * @param s The #space.
808
809
810
811
 * @param ind The indices with respect to which the parts are sorted.
 * @param N The number of parts
 * @param min Lowest index.
 * @param max highest index.
812
 * @param verbose Are we talkative ?
813
 */
814
void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
815
816
                      int verbose) {

Matthieu Schaller's avatar
Matthieu Schaller committed
817
  const ticks tic = getticks();
818

819
820
821
822
823
824
  /* Populate a parallel_sort structure with the input data */
  struct parallel_sort sort_struct;
  sort_struct.parts = s->parts;
  sort_struct.xparts = s->xparts;
  sort_struct.ind = ind;
  sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
825
826
  if ((sort_struct.stack =
           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
827
    error("Failed to allocate sorting stack.");
Matthieu Schaller's avatar
Matthieu Schaller committed
828
  for (unsigned int i = 0; i < sort_struct.stack_size; i++)
829
    sort_struct.stack[i].ready = 0;
830

831
  /* Add the first interval. */
832
833
834
835
836
837
838
839
840
841
842
  sort_struct.stack[0].i = 0;
  sort_struct.stack[0].j = N - 1;
  sort_struct.stack[0].min = min;
  sort_struct.stack[0].max = max;
  sort_struct.stack[0].ready = 1;
  sort_struct.first = 0;
  sort_struct.last = 1;
  sort_struct.waiting = 1;

  /* Launch the sorting tasks with a stride of zero such that the same
     map data is passed to each thread. */
843
844
  threadpool_map(&s->e->threadpool, space_parts_sort_mapper, &sort_struct,
                 s->e->threadpool.num_threads, 0, 1, NULL);
845

846
#ifdef SWIFT_DEBUG_CHECKS
847
  /* Verify space_sort_struct. */
848
  for (size_t i = 1; i < N; i++)
849
    if (ind[i - 1] > ind[i])
850
      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
851
852
853
            ind[i - 1], i, ind[i], min, max);
  message("Sorting succeeded.");
#endif
854

855
  /* Clean up. */
856
  free(sort_struct.stack);
857
858
859
860

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
861
}
862

863
864
void space_parts_sort_mapper(void *map_data, int num_elements,
                             void *extra_data) {
865
866
867

  /* Unpack the mapping data. */
  struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
868

869
  /* Pointers to the sorting data. */
870
871
872
  int *ind = sort_struct->ind;
  struct part *parts = sort_struct->parts;
  struct xpart *xparts = sort_struct->xparts;
873

874
  /* Main loop. */
875
  while (sort_struct->waiting) {
876

877
    /* Grab an interval off the queue. */
878
    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
879

880
    /* Wait for the entry to be ready, or for the sorting do be done. */
881
882
    while (!sort_struct->stack[qid].ready)
      if (!sort_struct->waiting) return;
883

884
    /* Get the stack entry. */
885
886
887
888
889
    ptrdiff_t i = sort_struct->stack[qid].i;
    ptrdiff_t j = sort_struct->stack[qid].j;
    int min = sort_struct->stack[qid].min;
    int max = sort_struct->stack[qid].max;
    sort_struct->stack[qid].ready = 0;
890

891
892
    /* Loop over sub-intervals. */
    while (1) {
893

894
      /* Bring beer. */
895
      const int pivot = (min + max) / 2;
896
897
      /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
              i, j, min, max, pivot); */
898
899

      /* One pass of QuickSort's partitioning. */
900
901
      ptrdiff_t ii = i;
      ptrdiff_t jj = j;
902
903
904
905
      while (ii < jj) {
        while (ii <= j && ind[ii] <= pivot) ii++;
        while (jj >= i && ind[jj] > pivot) jj--;
        if (ii < jj) {
906
          size_t temp_i = ind[ii];
907
908
          ind[ii] = ind[jj];
          ind[jj] = temp_i;
909
          struct part temp_p = parts[ii];
910
911
          parts[ii] = parts[jj];
          parts[jj] = temp_p;
912
          struct xpart temp_xp = xparts[ii];
913
914
915
916
          xparts[ii] = xparts[jj];
          xparts[jj] = temp_xp;
        }
      }
917

918
#ifdef SWIFT_DEBUG_CHECKS
919
      /* Verify space_sort_struct. */
920
      for (int k = i; k <= jj; k++)
921
        if (ind[k] > pivot) {
922
923
          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
                  k, ind[k], pivot, i, j);
924
925
926
927
          error("Partition failed (<=pivot).");
        }
      for (int k = jj + 1; k <= j; k++)
        if (ind[k] <= pivot) {
928
929
          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
                  k, ind[k], pivot, i, j);
930
          error("Partition failed (>pivot).");
931
932
        }
#endif
933
934
935
936
937
938

      /* Split-off largest interval. */
      if (jj - i > j - jj + 1) {

        /* Recurse on the left? */
        if (jj > i && pivot > min) {
939
          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
940
          while (sort_struct->stack[qid].ready)
941
            ;
942
943
944
945
          sort_struct->stack[qid].i = i;
          sort_struct->stack[qid].j = jj;
          sort_struct->stack[qid].min = min;
          sort_struct->stack[qid].max = pivot;
946
          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
947
            error("Qstack overflow.");
948
          sort_struct->stack[qid].ready = 1;
949
        }
950

951
952
953
954
955
956
957
958
959
960
        /* Recurse on the right? */
        if (jj + 1 < j && pivot + 1 < max) {
          i = jj + 1;
          min = pivot + 1;
        } else
          break;

      } else {

        /* Recurse on the right? */
961
        if (pivot + 1 < max) {
962
          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
963
          while (sort_struct->stack[qid].ready)
964
            ;
965
966
967
968
          sort_struct->stack[qid].i = jj + 1;
          sort_struct->stack[qid].j = j;
          sort_struct->stack[qid].min = pivot + 1;
          sort_struct->stack[qid].max = max;
969
          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
970
            error("Qstack overflow.");
971
          sort_struct->stack[qid].ready = 1;
972
        }
973

974
975
976
977
978
979
980