space.c 50 KB
Newer Older
1
/*******************************************************************************
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <math.h>
31
#include <stdlib.h>
32
#include <string.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
33

34
35
/* MPI headers. */
#ifdef WITH_MPI
36
#include <mpi.h>
37
38
#endif

39
40
41
/* This object's header. */
#include "space.h"

Pedro Gonnet's avatar
Pedro Gonnet committed
42
/* Local headers. */
43
#include "atomic.h"
44
#include "const.h"
45
#include "engine.h"
46
#include "error.h"
47
48
#include "gravity.h"
#include "hydro.h"
49
#include "kernel_hydro.h"
50
#include "lock.h"
51
#include "minmax.h"
52
#include "runner.h"
53
#include "threadpool.h"
54
#include "tools.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
55
56
57

/* Split size. */
int space_splitsize = space_splitsize_default;
58
int space_subsize = space_subsize_default;
59
int space_maxsize = space_maxsize_default;
Pedro Gonnet's avatar
Pedro Gonnet committed
60
61
62

/* Map shift vector to sortlist. */
const int sortlistID[27] = {
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    /* ( -1 , -1 , -1 ) */ 0,
    /* ( -1 , -1 ,  0 ) */ 1,
    /* ( -1 , -1 ,  1 ) */ 2,
    /* ( -1 ,  0 , -1 ) */ 3,
    /* ( -1 ,  0 ,  0 ) */ 4,
    /* ( -1 ,  0 ,  1 ) */ 5,
    /* ( -1 ,  1 , -1 ) */ 6,
    /* ( -1 ,  1 ,  0 ) */ 7,
    /* ( -1 ,  1 ,  1 ) */ 8,
    /* (  0 , -1 , -1 ) */ 9,
    /* (  0 , -1 ,  0 ) */ 10,
    /* (  0 , -1 ,  1 ) */ 11,
    /* (  0 ,  0 , -1 ) */ 12,
    /* (  0 ,  0 ,  0 ) */ 0,
    /* (  0 ,  0 ,  1 ) */ 12,
    /* (  0 ,  1 , -1 ) */ 11,
    /* (  0 ,  1 ,  0 ) */ 10,
    /* (  0 ,  1 ,  1 ) */ 9,
    /* (  1 , -1 , -1 ) */ 8,
    /* (  1 , -1 ,  0 ) */ 7,
    /* (  1 , -1 ,  1 ) */ 6,
    /* (  1 ,  0 , -1 ) */ 5,
    /* (  1 ,  0 ,  0 ) */ 4,
    /* (  1 ,  0 ,  1 ) */ 3,
    /* (  1 ,  1 , -1 ) */ 2,
    /* (  1 ,  1 ,  0 ) */ 1,
    /* (  1 ,  1 ,  1 ) */ 0};

91
92
93
94
95
96
97
98
99
100
101
/**
 * @brief Get the shift-id of the given pair of cells, swapping them
 *      if need be.
 *
 * @param s The space
 * @param ci Pointer to first #cell.
 * @param cj Pointer second #cell.
 * @param shift Vector from ci to cj.
 *
 * @return The shift ID and set shift, may or may not swap ci and cj.
 */
102
103
104
105
int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
                 double *shift) {

  /* Get the relative distance between the pairs, wrapping. */
106
107
108
  const int periodic = s->periodic;
  double dx[3];
  for (int k = 0; k < 3; k++) {
109
110
111
112
113
114
115
116
117
118
119
    dx[k] = (*cj)->loc[k] - (*ci)->loc[k];
    if (periodic && dx[k] < -s->dim[k] / 2)
      shift[k] = s->dim[k];
    else if (periodic && dx[k] > s->dim[k] / 2)
      shift[k] = -s->dim[k];
    else
      shift[k] = 0.0;
    dx[k] += shift[k];
  }

  /* Get the sorting index. */
120
  int sid = 0;
121
  for (int k = 0; k < 3; k++)
122
123
124
125
    sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1));

  /* Switch the cells around? */
  if (runner_flip[sid]) {
126
    struct cell *temp = *ci;
127
128
    *ci = *cj;
    *cj = temp;
129
    for (int k = 0; k < 3; k++) shift[k] = -shift[k];
130
131
132
133
134
135
  }
  sid = sortlistID[sid];

  /* Return the sort ID. */
  return sid;
}
136

137
/**
138
 * @brief Recursively dismantle a cell tree.
139
140
 *
 */
141
142
143
void space_rebuild_recycle(struct space *s, struct cell *c) {

  if (c->split)
144
    for (int k = 0; k < 8; k++)
145
146
147
148
149
150
151
      if (c->progeny[k] != NULL) {
        space_rebuild_recycle(s, c->progeny[k]);
        space_recycle(s, c->progeny[k]);
        c->progeny[k] = NULL;
      }
}

152
/**
153
 * @brief Re-build the cell grid.
154
 *
155
156
 * @param s The #space.
 * @param cell_max Maximum cell edge length.
157
 * @param verbose Print messages to stdout or not.
158
 */
159
void space_regrid(struct space *s, double cell_max, int verbose) {
160

161
  const size_t nr_parts = s->nr_parts;
162
  struct cell *restrict c;
163
  ticks tic = getticks();
164
  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
165

166
  /* Run through the cells and get the current h_max. */
167
  // tic = getticks();
168
  float h_max = s->cell_min / kernel_gamma / space_stretch;
169
  if (nr_parts > 0) {
170
    if (s->cells_top != NULL) {
Tom Theuns's avatar
Tom Theuns committed
171
      for (int k = 0; k < s->nr_cells; k++) {
172
173
174
        if (s->cells_top[k].nodeID == engine_rank &&
            s->cells_top[k].h_max > h_max) {
          h_max = s->cells_top[k].h_max;
175
        }
176
177
      }
    } else {
178
      for (size_t k = 0; k < nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
179
        if (s->parts[k].h > h_max) h_max = s->parts[k].h;
180
      }
181
182
183
184
185
186
187
188
189
190
    }
  }

/* If we are running in parallel, make sure everybody agrees on
   how large the largest cell should be. */
#ifdef WITH_MPI
  {
    float buff;
    if (MPI_Allreduce(&h_max, &buff, 1, MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD) !=
        MPI_SUCCESS)
191
      error("Failed to aggregate the rebuild flag across nodes.");
192
193
194
    h_max = buff;
  }
#endif
195
  if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max);
196
197

  /* Get the new putative cell dimensions. */
198
  int cdim[3];
199
  for (int k = 0; k < 3; k++)
200
201
202
203
204
205
206
207
208
    cdim[k] =
        floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max));

  /* Check if we have enough cells for periodicity. */
  if (s->periodic && (cdim[0] < 3 || cdim[1] < 3 || cdim[2] < 3))
    error(
        "Must have at least 3 cells in each spatial dimension when periodicity "
        "is switched on.");

209
210
211
212
213
214
  /* Check if we have enough cells for gravity. */
  if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
    error(
        "Must have at least 8 cells in each spatial dimension when gravity "
        "is switched on.");

215
216
217
/* In MPI-Land, changing the top-level cell size requires that the
 * global partition is recomputed and the particles redistributed.
 * Be prepared to do that. */
218
#ifdef WITH_MPI
Matthieu Schaller's avatar
Matthieu Schaller committed
219
  double oldwidth[3];
220
221
222
223
224
225
226
227
  double oldcdim[3];
  int *oldnodeIDs = NULL;
  if (cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2]) {

    /* Capture state of current space. */
    oldcdim[0] = s->cdim[0];
    oldcdim[1] = s->cdim[1];
    oldcdim[2] = s->cdim[2];
228
229
230
    oldwidth[0] = s->width[0];
    oldwidth[1] = s->width[1];
    oldwidth[2] = s->width[2];
231
232
233
234
235
236
237
238
239

    if ((oldnodeIDs = (int *)malloc(sizeof(int) * s->nr_cells)) == NULL)
      error("Failed to allocate temporary nodeIDs.");

    int cid = 0;
    for (int i = 0; i < s->cdim[0]; i++) {
      for (int j = 0; j < s->cdim[1]; j++) {
        for (int k = 0; k < s->cdim[2]; k++) {
          cid = cell_getid(oldcdim, i, j, k);
240
          oldnodeIDs[cid] = s->cells_top[cid].nodeID;
241
242
243
244
245
        }
      }
    }
  }

246
247
248
249
#endif

  /* Do we need to re-build the upper-level cells? */
  // tic = getticks();
250
  if (s->cells_top == NULL || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] ||
251
252
253
      cdim[2] < s->cdim[2]) {

    /* Free the old cells, if they were allocated. */
254
    if (s->cells_top != NULL) {
255
      for (int k = 0; k < s->nr_cells; k++) {
256
257
        space_rebuild_recycle(s, &s->cells_top[k]);
        if (s->cells_top[k].sort != NULL) free(s->cells_top[k].sort);
258
      }
259
      free(s->cells_top);
260
261
262
263
      s->maxdepth = 0;
    }

    /* Set the new cell dimensions only if smaller. */
264
    for (int k = 0; k < 3; k++) {
265
      s->cdim[k] = cdim[k];
266
267
      s->width[k] = s->dim[k] / cdim[k];
      s->iwidth[k] = 1.0 / s->width[k];
268
    }
269
    const float dmin = fminf(s->width[0], fminf(s->width[1], s->width[2]));
270
271
272

    /* Allocate the highest level of cells. */
    s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
273
    if (posix_memalign((void *)&s->cells_top, cell_align,
274
275
                       s->nr_cells * sizeof(struct cell)) != 0)
      error("Failed to allocate cells.");
276
    bzero(s->cells_top, s->nr_cells * sizeof(struct cell));
277
    for (int k = 0; k < s->nr_cells; k++)
278
279
      if (lock_init(&s->cells_top[k].lock) != 0)
        error("Failed to init spinlock.");
280
281

    /* Set the cell location and sizes. */
282
283
284
    for (int i = 0; i < cdim[0]; i++)
      for (int j = 0; j < cdim[1]; j++)
        for (int k = 0; k < cdim[2]; k++) {
285
          c = &s->cells_top[cell_getid(cdim, i, j, k)];
286
287
288
289
290
291
          c->loc[0] = i * s->width[0];
          c->loc[1] = j * s->width[1];
          c->loc[2] = k * s->width[2];
          c->width[0] = s->width[0];
          c->width[1] = s->width[1];
          c->width[2] = s->width[2];
292
293
294
295
296
          c->dmin = dmin;
          c->depth = 0;
          c->count = 0;
          c->gcount = 0;
          c->super = c;
297
          c->ti_old = ti_current;
298
          lock_init(&c->lock);
Pedro Gonnet's avatar
Pedro Gonnet committed
299
        }
300
301

    /* Be verbose about the change. */
302
303
304
    if (verbose)
      message("set cell dimensions to [ %i %i %i ].", cdim[0], cdim[1],
              cdim[2]);
305
306
    fflush(stdout);

307
#ifdef WITH_MPI
308
309
310
311
312
    if (oldnodeIDs != NULL) {
      /* We have changed the top-level cell dimension, so need to redistribute
       * cells around the nodes. We repartition using the old space node
       * positions as a grid to resample. */
      if (s->e->nodeID == 0)
313
314
315
        message(
            "basic cell dimensions have increased - recalculating the "
            "global partition.");
316

Matthieu Schaller's avatar
Matthieu Schaller committed
317
      if (!partition_space_to_space(oldwidth, oldcdim, oldnodeIDs, s)) {
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335

        /* Failed, try another technique that requires no settings. */
        message("Failed to get a new partition, trying less optimal method");
        struct partition initial_partition;
#ifdef HAVE_METIS
        initial_partition.type = INITPART_METIS_NOWEIGHT;
#else
        initial_partition.type = INITPART_VECTORIZE;
#endif
        partition_initial_partition(&initial_partition, s->e->nodeID,
                                    s->e->nr_nodes, s);
      }

      /* Re-distribute the particles to their new nodes. */
      engine_redistribute(s->e);

      /* Make the proxies. */
      engine_makeproxies(s->e);
336

337
338
      /* Finished with these. */
      free(oldnodeIDs);
339
340
    }
#endif
341
  } /* re-build upper-level cells? */
342
343
  // message( "rebuilding upper-level cells took %.3f %s." ,
  // clocks_from_ticks(double)(getticks() - tic), clocks_getunit());
344
345
346
347
348

  /* Otherwise, just clean up the cells. */
  else {

    /* Free the old cells, if they were allocated. */
349
    for (int k = 0; k < s->nr_cells; k++) {
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
      space_rebuild_recycle(s, &s->cells_top[k]);
      s->cells_top[k].sorts = NULL;
      s->cells_top[k].nr_tasks = 0;
      s->cells_top[k].nr_density = 0;
      s->cells_top[k].nr_gradient = 0;
      s->cells_top[k].nr_force = 0;
      s->cells_top[k].density = NULL;
      s->cells_top[k].gradient = NULL;
      s->cells_top[k].force = NULL;
      s->cells_top[k].dx_max = 0.0f;
      s->cells_top[k].sorted = 0;
      s->cells_top[k].count = 0;
      s->cells_top[k].gcount = 0;
      s->cells_top[k].init = NULL;
      s->cells_top[k].extra_ghost = NULL;
      s->cells_top[k].ghost = NULL;
      s->cells_top[k].kick = NULL;
      s->cells_top[k].super = &s->cells_top[k];
368
    }
369
370
    s->maxdepth = 0;
  }
371
372
373
374

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
375
}
376
377
378
379
380
381

/**
 * @brief Re-build the cells as well as the tasks.
 *
 * @param s The #space in which to update the cells.
 * @param cell_max Maximal cell size.
382
 * @param verbose Print messages to stdout or not
383
384
 *
 */
385
void space_rebuild(struct space *s, double cell_max, int verbose) {
386

Matthieu Schaller's avatar
Matthieu Schaller committed
387
  const ticks tic = getticks();
388
389

  /* Be verbose about this. */
390
  // message("re)building space..."); fflush(stdout);
391
392

  /* Re-grid if necessary, or just re-set the cell data. */
393
  space_regrid(s, cell_max, verbose);
394

Pedro Gonnet's avatar
Pedro Gonnet committed
395
396
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
397
  struct cell *restrict cells_top = s->cells_top;
398
  const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
399

400
  const double ih[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
Matthieu Schaller's avatar
Matthieu Schaller committed
401
402
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]};
403
404
405
406

  /* Run through the particles and get their cell index. */
  // tic = getticks();
  const size_t ind_size = s->size_parts;
407
408
  int *ind;
  if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL)
409
    error("Failed to allocate temporary particle indices.");
Pedro Gonnet's avatar
Pedro Gonnet committed
410
  for (size_t k = 0; k < nr_parts; k++) {
411
412
    struct part *restrict p = &s->parts[k];
    for (int j = 0; j < 3; j++)
413
414
415
416
      if (p->x[j] < 0.0)
        p->x[j] += dim[j];
      else if (p->x[j] >= dim[j])
        p->x[j] -= dim[j];
417
    ind[k] =
418
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
419
    cells_top[ind[k]].count++;
420
  }
Pedro Gonnet's avatar
Pedro Gonnet committed
421
422
  // message( "getting particle indices took %.3f %s." ,
  // clocks_from_ticks(getticks() - tic), clocks_getunit()):
423

424
425
426
427
428
429
  /* Run through the gravity particles and get their cell index. */
  // tic = getticks();
  const size_t gind_size = s->size_gparts;
  int *gind;
  if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL)
    error("Failed to allocate temporary g-particle indices.");
430
  for (size_t k = 0; k < nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
431
    struct gpart *restrict gp = &s->gparts[k];
432
433
434
435
436
437
438
    for (int j = 0; j < 3; j++)
      if (gp->x[j] < 0.0)
        gp->x[j] += dim[j];
      else if (gp->x[j] >= dim[j])
        gp->x[j] -= dim[j];
    gind[k] =
        cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]);
439
    cells_top[gind[k]].gcount++;
440
  }
Matthieu Schaller's avatar
Matthieu Schaller committed
441
// message( "getting particle indices took %.3f %s." ,
442
// clocks_from_ticks(getticks() - tic), clocks_getunit());
443
444

#ifdef WITH_MPI
445

446
  /* Move non-local parts to the end of the list. */
447
  const int local_nodeID = s->e->nodeID;
448
  for (size_t k = 0; k < nr_parts;) {
449
450
    if (cells_top[ind[k]].nodeID != local_nodeID) {
      cells_top[ind[k]].count -= 1;
451
      nr_parts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
452
      const struct part tp = s->parts[k];
453
454
      s->parts[k] = s->parts[nr_parts];
      s->parts[nr_parts] = tp;
455
      if (s->parts[k].gpart != NULL) {
456
        s->parts[k].gpart->id_or_neg_offset = -k;
457
458
      }
      if (s->parts[nr_parts].gpart != NULL) {
459
        s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
460
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
461
      const struct xpart txp = s->xparts[k];
462
463
      s->xparts[k] = s->xparts[nr_parts];
      s->xparts[nr_parts] = txp;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
464
      const int t = ind[k];
465
466
      ind[k] = ind[nr_parts];
      ind[nr_parts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
467
    } else {
468
469
470
471
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
472

473
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
474
  /* Check that all parts are in the correct places. */
475
  for (size_t k = 0; k < nr_parts; k++) {
476
    if (cells_top[ind[k]].nodeID != local_nodeID) {
477
478
479
480
      error("Failed to move all non-local parts to send list");
    }
  }
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
481
    if (cells_top[ind[k]].nodeID == local_nodeID) {
482
      error("Failed to remove local parts from send list");
483
    }
484
485
  }
#endif
486

487
  /* Move non-local gparts to the end of the list. */
488
  for (size_t k = 0; k < nr_gparts;) {
489
490
    if (cells_top[gind[k]].nodeID != local_nodeID) {
      cells_top[gind[k]].gcount -= 1;
491
      nr_gparts -= 1;
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
492
      const struct gpart tp = s->gparts[k];
493
494
      s->gparts[k] = s->gparts[nr_gparts];
      s->gparts[nr_gparts] = tp;
495
496
      if (s->gparts[k].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
497
      }
498
499
500
      if (s->gparts[nr_gparts].id_or_neg_offset <= 0) {
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
501
      }
Matthieu Schaller's avatar
Bug fix    
Matthieu Schaller committed
502
503
504
      const int t = gind[k];
      gind[k] = gind[nr_gparts];
      gind[nr_gparts] = t;
Matthieu Schaller's avatar
Matthieu Schaller committed
505
    } else {
506
507
508
509
      /* Increment when not exchanging otherwise we need to retest "k".*/
      k++;
    }
  }
510

511
#ifdef SWIFT_DEBUG_CHECKS
512
513
  /* Check that all gparts are in the correct place (untested). */
  for (size_t k = 0; k < nr_gparts; k++) {
514
    if (cells_top[gind[k]].nodeID != local_nodeID) {
515
516
517
518
      error("Failed to move all non-local gparts to send list");
    }
  }
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
519
    if (cells_top[gind[k]].nodeID == local_nodeID) {
520
      error("Failed to remove local gparts from send list");
521
    }
522
523
  }
#endif
524

525
526
  /* Exchange the strays, note that this potentially re-allocates
     the parts arrays. */
527
  size_t nr_parts_exchanged = s->nr_parts - nr_parts;
528
  size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
529
530
531
532
  engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged,
                         nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged);

  /* Set the new particle counts. */
533
  s->nr_parts = nr_parts + nr_parts_exchanged;
534
  s->nr_gparts = nr_gparts + nr_gparts_exchanged;
535
536

  /* Re-allocate the index array if needed.. */
537
  if (s->nr_parts > ind_size) {
538
539
    int *ind_new;
    if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
540
      error("Failed to allocate temporary particle indices.");
541
    memcpy(ind_new, ind, sizeof(int) * nr_parts);
542
543
    free(ind);
    ind = ind_new;
544
545
546
  }

  /* Assign each particle to its cell. */
Pedro Gonnet's avatar
Pedro Gonnet committed
547
  for (size_t k = nr_parts; k < s->nr_parts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
548
    const struct part *const p = &s->parts[k];
549
    ind[k] =
550
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
551
    cells_top[ind[k]].count += 1;
552
#ifdef SWIFT_DEBUG_CHECKS
553
    if (cells_top[ind[k]].nodeID != local_nodeID)
554
      error("Received part that does not belong to me (nodeID=%i).",
555
            cells_top[ind[k]].nodeID);
556
#endif
557
  }
558
  nr_parts = s->nr_parts;
559
560

#endif /* WITH_MPI */
561
562

  /* Sort the parts according to their cells. */
563
  space_parts_sort(s, ind, nr_parts, 0, s->nr_cells - 1, verbose);
564
565

  /* Re-link the gparts. */
566
  part_relink_gparts(s->parts, nr_parts, 0);
567

568
#ifdef SWIFT_DEBUG_CHECKS
569
  /* Verify space_sort_struct. */
570
571
572
573
574
575
576
577
578
579
  for (size_t k = 1; k < nr_parts; k++) {
    if (ind[k - 1] > ind[k]) {
      error("Sort failed!");
    } else if (ind[k] != cell_getid(cdim, s->parts[k].x[0] * ih[0],
                                    s->parts[k].x[1] * ih[1],
                                    s->parts[k].x[2] * ih[2])) {
      error("Incorrect indices!");
    }
  }
#endif
580
581

  /* We no longer need the indices as of here. */
582
  free(ind);
583

584
585
586
587
#ifdef WITH_MPI

  /* Re-allocate the index array if needed.. */
  if (s->nr_gparts > gind_size) {
588
589
    int *gind_new;
    if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
590
      error("Failed to allocate temporary g-particle indices.");
591
    memcpy(gind_new, gind, sizeof(int) * nr_gparts);
592
593
594
595
596
    free(gind);
    gind = gind_new;
  }

  /* Assign each particle to its cell. */
597
  for (size_t k = nr_gparts; k < s->nr_gparts; k++) {
Matthieu Schaller's avatar
Matthieu Schaller committed
598
    const struct gpart *const p = &s->gparts[k];
599
600
    gind[k] =
        cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
601
602
603
604
605
606
607
    cells_top[gind[k]].gcount += 1;

#ifdef SWIFT_DEBUG_CHECKS
    if (cells_top[ind[k]].nodeID != s->e->nodeID)
      error("Received part that does not belong to me (nodeID=%i).",
            cells_top[ind[k]].nodeID);
#endif
608
609
610
611
  }
  nr_gparts = s->nr_gparts;

#endif
612
613

  /* Sort the parts according to their cells. */
Matthieu Schaller's avatar
Matthieu Schaller committed
614
  space_gparts_sort(s, gind, nr_gparts, 0, s->nr_cells - 1, verbose);
615
616

  /* Re-link the parts. */
617
  part_relink_parts(s->gparts, nr_gparts, s->parts);
618
619

  /* We no longer need the indices as of here. */
620
  free(gind);
621

622
#ifdef SWIFT_DEBUG_CHECKS
623
624
625
  /* Verify that the links are correct */
  for (size_t k = 0; k < nr_gparts; ++k) {

626
    if (s->gparts[k].id_or_neg_offset < 0) {
627

628
      const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset];
629

630
631
632
633
      if (part->gpart != &s->gparts[k]) error("Linking problem !");

      if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] ||
          s->gparts[k].x[2] != part->x[2])
634
635
636
637
638
        error("Linked particles are not at the same position !");
    }
  }
  for (size_t k = 0; k < nr_parts; ++k) {

639
    if (s->parts[k].gpart != NULL &&
640
        s->parts[k].gpart->id_or_neg_offset != -(ptrdiff_t)k) {
641
      error("Linking problem !");
642
643
    }
  }
644
#endif
645

646
647
  /* Hook the cells up to the parts. */
  // tic = getticks();
648
649
650
  struct part *finger = s->parts;
  struct xpart *xfinger = s->xparts;
  struct gpart *gfinger = s->gparts;
651
  for (int k = 0; k < s->nr_cells; k++) {
652
    struct cell *restrict c = &cells_top[k];
653
    c->ti_old = ti_current;
654
655
656
657
658
659
660
    c->parts = finger;
    c->xparts = xfinger;
    c->gparts = gfinger;
    finger = &finger[c->count];
    xfinger = &xfinger[c->count];
    gfinger = &gfinger[c->gcount];
  }
661
  // message( "hooking up cells took %.3f %s." ,
Matthieu Schaller's avatar
Matthieu Schaller committed
662
  // clocks_from_ticks(getticks() - tic), clocks_getunit());
663
664
665

  /* At this point, we have the upper-level cells, old or new. Now make
     sure that the parts in each cell are ok. */
666
  space_split(s, cells_top, verbose);
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
}

/**
 * @brief Split particles between cells of a hierarchy
 *
 * @param s The #space.
 * @param cells The cell hierarchy
 * @param verbose Are we talkative ?
 */
void space_split(struct space *s, struct cell *cells, int verbose) {

Matthieu Schaller's avatar
Matthieu Schaller committed
682
  const ticks tic = getticks();
683

684
  threadpool_map(&s->e->threadpool, space_split_mapper, cells, s->nr_cells,
685
                 sizeof(struct cell), 1, s);
686

687
688
689
  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
690
}
691

692
/**
693
694
 * @brief Sort the particles and condensed particles according to the given
 *indices.
695
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
696
 * @param s The #space.
697
698
699
700
 * @param ind The indices with respect to which the parts are sorted.
 * @param N The number of parts
 * @param min Lowest index.
 * @param max highest index.
701
 * @param verbose Are we talkative ?
702
 */
703
void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max,
704
705
                      int verbose) {

Matthieu Schaller's avatar
Matthieu Schaller committed
706
  const ticks tic = getticks();
707

708
709
710
711
712
713
  /* Populate a parallel_sort structure with the input data */
  struct parallel_sort sort_struct;
  sort_struct.parts = s->parts;
  sort_struct.xparts = s->xparts;
  sort_struct.ind = ind;
  sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
714
715
  if ((sort_struct.stack =
           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
716
    error("Failed to allocate sorting stack.");
Matthieu Schaller's avatar
Matthieu Schaller committed
717
  for (unsigned int i = 0; i < sort_struct.stack_size; i++)
718
    sort_struct.stack[i].ready = 0;
719

720
  /* Add the first interval. */
721
722
723
724
725
726
727
728
729
730
731
  sort_struct.stack[0].i = 0;
  sort_struct.stack[0].j = N - 1;
  sort_struct.stack[0].min = min;
  sort_struct.stack[0].max = max;
  sort_struct.stack[0].ready = 1;
  sort_struct.first = 0;
  sort_struct.last = 1;
  sort_struct.waiting = 1;

  /* Launch the sorting tasks with a stride of zero such that the same
     map data is passed to each thread. */
732
733
  threadpool_map(&s->e->threadpool, space_parts_sort_mapper, &sort_struct,
                 s->e->threadpool.num_threads, 0, 1, NULL);
734

735
#ifdef SWIFT_DEBUG_CHECKS
736
  /* Verify space_sort_struct. */
737
  for (size_t i = 1; i < N; i++)
738
    if (ind[i - 1] > ind[i])
739
      error("Sorting failed (ind[%zu]=%i,ind[%zu]=%i), min=%i, max=%i.", i - 1,
740
741
742
            ind[i - 1], i, ind[i], min, max);
  message("Sorting succeeded.");
#endif
743

744
  /* Clean up. */
745
  free(sort_struct.stack);
746
747
748
749

  if (verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
750
}
751

752
753
void space_parts_sort_mapper(void *map_data, int num_elements,
                             void *extra_data) {
754
755
756

  /* Unpack the mapping data. */
  struct parallel_sort *sort_struct = (struct parallel_sort *)map_data;
757

758
  /* Pointers to the sorting data. */
759
760
761
  int *ind = sort_struct->ind;
  struct part *parts = sort_struct->parts;
  struct xpart *xparts = sort_struct->xparts;
762

763
  /* Main loop. */
764
  while (sort_struct->waiting) {
765

766
    /* Grab an interval off the queue. */
767
    int qid = atomic_inc(&sort_struct->first) % sort_struct->stack_size;
768

769
    /* Wait for the entry to be ready, or for the sorting do be done. */
770
771
    while (!sort_struct->stack[qid].ready)
      if (!sort_struct->waiting) return;
772

773
    /* Get the stack entry. */
774
775
776
777
778
    ptrdiff_t i = sort_struct->stack[qid].i;
    ptrdiff_t j = sort_struct->stack[qid].j;
    int min = sort_struct->stack[qid].min;
    int max = sort_struct->stack[qid].max;
    sort_struct->stack[qid].ready = 0;
779

780
781
    /* Loop over sub-intervals. */
    while (1) {
782

783
      /* Bring beer. */
784
      const int pivot = (min + max) / 2;
785
786
      /* message("Working on interval [%i,%i] with min=%i, max=%i, pivot=%i.",
              i, j, min, max, pivot); */
787
788

      /* One pass of QuickSort's partitioning. */
789
790
      ptrdiff_t ii = i;
      ptrdiff_t jj = j;
791
792
793
794
      while (ii < jj) {
        while (ii <= j && ind[ii] <= pivot) ii++;
        while (jj >= i && ind[jj] > pivot) jj--;
        if (ii < jj) {
795
          size_t temp_i = ind[ii];
796
797
          ind[ii] = ind[jj];
          ind[jj] = temp_i;
798
          struct part temp_p = parts[ii];
799
800
          parts[ii] = parts[jj];
          parts[jj] = temp_p;
801
          struct xpart temp_xp = xparts[ii];
802
803
804
805
          xparts[ii] = xparts[jj];
          xparts[jj] = temp_xp;
        }
      }
806

807
#ifdef SWIFT_DEBUG_CHECKS
808
      /* Verify space_sort_struct. */
809
      for (int k = i; k <= jj; k++)
810
        if (ind[k] > pivot) {
811
812
          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
                  k, ind[k], pivot, i, j);
813
814
815
816
          error("Partition failed (<=pivot).");
        }
      for (int k = jj + 1; k <= j; k++)
        if (ind[k] <= pivot) {
817
818
          message("sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%li, j=%li.",
                  k, ind[k], pivot, i, j);
819
          error("Partition failed (>pivot).");
820
821
        }
#endif
822
823
824
825
826
827

      /* Split-off largest interval. */
      if (jj - i > j - jj + 1) {

        /* Recurse on the left? */
        if (jj > i && pivot > min) {
828
          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
829
          while (sort_struct->stack[qid].ready)
830
            ;
831
832
833
834
          sort_struct->stack[qid].i = i;
          sort_struct->stack[qid].j = jj;
          sort_struct->stack[qid].min = min;
          sort_struct->stack[qid].max = pivot;
835
          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
836
            error("Qstack overflow.");
837
          sort_struct->stack[qid].ready = 1;
838
        }
839

840
841
842
843
844
845
846
847
848
849
        /* Recurse on the right? */
        if (jj + 1 < j && pivot + 1 < max) {
          i = jj + 1;
          min = pivot + 1;
        } else
          break;

      } else {

        /* Recurse on the right? */
850
        if (pivot + 1 < max) {
851
          qid = atomic_inc(&sort_struct->last) % sort_struct->stack_size;
852
          while (sort_struct->stack[qid].ready)
853
            ;
854
855
856
857
          sort_struct->stack[qid].i = jj + 1;
          sort_struct->stack[qid].j = j;
          sort_struct->stack[qid].min = pivot + 1;
          sort_struct->stack[qid].max = max;
858
          if (atomic_inc(&sort_struct->waiting) >= sort_struct->stack_size)
859
            error("Qstack overflow.");
860
          sort_struct->stack[qid].ready = 1;
861
        }
862

863
864
865
866
867
868
869
        /* Recurse on the left? */
        if (jj > i && pivot > min) {
          j = jj;
          max = pivot;
        } else
          break;
      }
870

871
872
    } /* loop over sub-intervals. */

873
    atomic_dec(&sort_struct->waiting);
874
875

  } /* main loop. */
876
877
}

878
879
880
881
882
/**
 * @brief Sort the g-particles and condensed particles according to the given
 *indices.
 *
 * @param s The #space.
Matthieu Schaller's avatar
Matthieu Schaller committed
883
884
 * @param ind The indices with respect to which the gparts are sorted.
 * @param N The number of gparts
885
886
887
888
 * @param min Lowest index.
 * @param max highest index.
 * @param verbose Are we talkative ?
 */
Matthieu Schaller's avatar
Matthieu Schaller committed
889
void space_gparts_sort(struct space *s, int *ind, size_t N, int min, int max,
890
891
                       int verbose) {

Matthieu Schaller's avatar
Matthieu Schaller committed
892
  const ticks tic = getticks();
893

894
895
896
897
898
  /*Populate a global parallel_sort structure with the input data */
  struct parallel_sort sort_struct;
  sort_struct.gparts = s->gparts;
  sort_struct.ind = ind;
  sort_struct.stack_size = 2 * (max - min + 1) + 10 + s->e->nr_threads;
899
900
  if ((sort_struct.stack =
           malloc(sizeof(struct qstack) * sort_struct.stack_size)) == NULL)
901
    error("Failed to allocate sorting stack.");
Matthieu Schaller's avatar
Matthieu Schaller committed
902
  for (unsigned int i = 0; i < sort_struct.stack_size; i++)
903
    sort_struct.stack[i].ready = 0;
904
905

  /* Add the first interval. */
906
907
908
909
910
911
912
913
914
915
916
  sort_struct.stack[0].i = 0;
  sort_struct.stack[0].j = N - 1;
  sort_struct.stack[0].min = min;
  sort_struct.stack[0].max = max;
  sort_struct.stack[0].ready = 1;
  sort_struct.first = 0;
  sort_struct.last = 1;
  sort_struct.waiting = 1;

  /* Launch the sorting tasks with a stride of zero such that the same
     map data is passed to each thread. */
917
918
  threadpool_map(&s->e->threadpool, space_gparts_sort_mapper,</