engine.c 193 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
36
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
37

38
39
/* MPI headers. */
#ifdef WITH_MPI
40
#include <mpi.h>
41
42
#endif

Angus Lepper's avatar
Angus Lepper committed
43
44
#ifdef HAVE_LIBNUMA
#include <numa.h>
45
46
#endif

47
48
49
50
51
/* Load the profiler header, if needed. */
#ifdef WITH_PROFILER
#include <gperftools/profiler.h>
#endif

52
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
53
#include "engine.h"
54
55

/* Local headers. */
56
#include "active.h"
57
#include "atomic.h"
58
#include "cell.h"
59
#include "chemistry.h"
60
#include "clocks.h"
61
#include "cooling.h"
62
#include "cosmology.h"
63
64
#include "cycle.h"
#include "debug.h"
65
#include "entropy_floor.h"
66
#include "equation_of_state.h"
67
#include "error.h"
68
#include "gravity.h"
69
#include "gravity_cache.h"
70
#include "hydro.h"
lhausamm's avatar
lhausamm committed
71
#include "logger.h"
lhausamm's avatar
lhausamm committed
72
#include "logger_io.h"
73
#include "map.h"
74
#include "memswap.h"
75
#include "memuse.h"
76
#include "minmax.h"
77
#include "outputlist.h"
78
#include "parallel_io.h"
79
#include "part.h"
80
#include "partition.h"
James Willis's avatar
James Willis committed
81
#include "profiler.h"
82
#include "proxy.h"
83
#include "restart.h"
84
#include "runner.h"
85
86
#include "serial_io.h"
#include "single_io.h"
87
#include "sort_part.h"
88
#include "star_formation.h"
Loic Hausammann's avatar
Loic Hausammann committed
89
#include "stars_io.h"
90
#include "statistics.h"
91
#include "timers.h"
92
#include "tools.h"
93
#include "units.h"
94
#include "velociraptor_interface.h"
Matthieu Schaller's avatar
Matthieu Schaller committed
95
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
96

97
98
99
/* Particle cache size. */
#define CACHE_SIZE 512

100
101
102
103
104
const char *engine_policy_names[] = {"none",
                                     "rand",
                                     "steal",
                                     "keep",
                                     "block",
105
                                     "cpu tight",
106
                                     "mpi",
107
                                     "numa affinity",
108
                                     "hydro",
109
110
111
112
113
                                     "self gravity",
                                     "external gravity",
                                     "cosmological integration",
                                     "drift everything",
                                     "reconstruct multi-poles",
114
                                     "temperature",
115
                                     "cooling",
James Willis's avatar
James Willis committed
116
                                     "stars",
Loic Hausammann's avatar
Loic Hausammann committed
117
                                     "structure finding",
118
                                     "star formation",
119
120
                                     "feedback",
                                     "time-step limiter"};
Pedro Gonnet's avatar
Pedro Gonnet committed
121

122
123
124
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

125
/** The current step of the engine as a global variable (for messages). */
126
int engine_current_step;
127

128
129
130
131
132
/**
 * @brief Data collected from the cells at the end of a time-step
 */
struct end_of_step_data {

133
134
  size_t updated, g_updated, s_updated;
  size_t inhibited, g_inhibited, s_inhibited;
135
136
  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
137
  integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max;
138
139
140
  struct engine *e;
};

141
142
143
144
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
145
 * @param l A pointer to the #link, will be modified atomically.
146
147
148
149
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
150
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
151

152
153
154
155
156
157
#ifdef SWIFT_DEBUG_CHECKS
  if (t == NULL) {
    error("Trying to link NULL task.");
  }
#endif

158
  /* Get the next free link. */
159
  const size_t ind = atomic_inc(&e->nr_links);
160
  if (ind >= e->size_links) {
161
162
163
    error(
        "Link table overflow. Increase the value of "
        "`Scheduler:links_per_tasks`.");
164
165
  }
  struct link *res = &e->links[ind];
166

167
  /* Set it atomically. */
168
  res->t = t;
169
  res->next = atomic_swap(l, res);
170
}
171

172
#ifdef WITH_MPI
173
/**
Peter W. Draper's avatar
Peter W. Draper committed
174
175
 * Do the exchange of one type of particles with all the other nodes.
 *
176
 * @param label a label for the memory allocations of this particle type.
Peter W. Draper's avatar
Peter W. Draper committed
177
178
179
180
181
182
183
184
185
186
187
188
189
 * @param counts 2D array with the counts of particles to exchange with
 *               each other node.
 * @param parts the particle data to exchange
 * @param new_nr_parts the number of particles this node will have after all
 *                     exchanges have completed.
 * @param sizeofparts sizeof the particle struct.
 * @param alignsize the memory alignment required for this particle type.
 * @param mpi_type the MPI_Datatype for these particles.
 * @param nr_nodes the number of nodes to exchange with.
 * @param nodeID the id of this node.
 *
 * @result new particle data constructed from all the exchanges with the
 *         given alignment.
190
 */
191
static void *engine_do_redistribute(const char *label, int *counts, char *parts,
192
193
                                    size_t new_nr_parts, size_t sizeofparts,
                                    size_t alignsize, MPI_Datatype mpi_type,
194
                                    int nr_nodes, int nodeID) {
195
196

  /* Allocate a new particle array with some extra margin */
197
  char *parts_new = NULL;
198
199
  if (swift_memalign(
          label, (void **)&parts_new, alignsize,
200
          sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0)
201
202
203
204
    error("Failed to allocate new particle data.");

  /* Prepare MPI requests for the asynchronous communications */
  MPI_Request *reqs;
205
206
  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) ==
      NULL)
207
208
    error("Failed to allocate MPI request list.");

209
  /* Only send and receive only "chunk" particles per request. So we need to
210
211
212
   * loop as many times as necessary here. Make 2Gb/sizeofparts so we only
   * send 2Gb packets. */
  const int chunk = INT_MAX / sizeofparts;
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
  int sent = 0;
  int recvd = 0;

  int activenodes = 1;
  while (activenodes) {

    for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;

    /* Emit the sends and recvs for the data. */
    size_t offset_send = sent;
    size_t offset_recv = recvd;
    activenodes = 0;

    for (int k = 0; k < nr_nodes; k++) {

      /* Indices in the count arrays of the node of interest */
      const int ind_send = nodeID * nr_nodes + k;
      const int ind_recv = k * nr_nodes + nodeID;

      /* Are we sending any data this loop? */
      int sending = counts[ind_send] - sent;
      if (sending > 0) {
        activenodes++;
236
        if (sending > chunk) sending = chunk;
237
238
239
240

        /* If the send and receive is local then just copy. */
        if (k == nodeID) {
          int receiving = counts[ind_recv] - recvd;
241
          if (receiving > chunk) receiving = chunk;
242
          memcpy(&parts_new[offset_recv * sizeofparts],
243
                 &parts[offset_send * sizeofparts], sizeofparts * receiving);
244
245
        } else {
          /* Otherwise send it. */
246
247
248
          int res =
              MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k,
                        ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]);
249
250
251
252
          if (res != MPI_SUCCESS)
            mpi_error(res, "Failed to isend parts to node %i.", k);
        }
      }
253

254
      /* If we're sending to this node, then move past it to next. */
255
      if (counts[ind_send] > 0) offset_send += counts[ind_send];
256

257
258
259
260
261
262
      /* Are we receiving any data from this node? Note already done if coming
       * from this node. */
      if (k != nodeID) {
        int receiving = counts[ind_recv] - recvd;
        if (receiving > 0) {
          activenodes++;
263
          if (receiving > chunk) receiving = chunk;
264
265
266
267
268
269
          int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving,
                              mpi_type, k, ind_recv, MPI_COMM_WORLD,
                              &reqs[2 * k + 1]);
          if (res != MPI_SUCCESS)
            mpi_error(res, "Failed to emit irecv of parts from node %i.", k);
        }
270
271
      }

272
      /* If we're receiving from this node, then move past it to next. */
273
      if (counts[ind_recv] > 0) offset_recv += counts[ind_recv];
274
275
    }

276
277
278
279
280
281
282
    /* Wait for all the sends and recvs to tumble in. */
    MPI_Status stats[2 * nr_nodes];
    int res;
    if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
      for (int k = 0; k < 2 * nr_nodes; k++) {
        char buff[MPI_MAX_ERROR_STRING];
        MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
283
284
        message("request from source %i, tag %i has error '%s'.",
                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
285
286
      }
      error("Failed during waitall for part data.");
287
    }
288
289
290
291

    /* Move to next chunks. */
    sent += chunk;
    recvd += chunk;
292
293
294
295
296
297
298
299
300
301
  }

  /* Free temps. */
  free(reqs);

  /* And return new memory. */
  return parts_new;
}
#endif

302
#ifdef WITH_MPI /* redist_mapper */
303

304
/* Support for engine_redistribute threadpool dest mappers. */
305
struct redist_mapper_data {
306
  int *counts;
307
308
309
310
311
  int *dest;
  int nodeID;
  int nr_nodes;
  struct cell *cells;
  struct space *s;
312
313
314
  void *base;
};

315
316
317
/* Generic function for accumulating counts for TYPE parts. Note
 * we use a local counts array to avoid the atomic_add in the parts
 * loop. */
Peter W. Draper's avatar
Peter W. Draper committed
318
#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE)                              \
319
320
321
  engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \
                                         void *extra_data) {               \
    struct TYPE *parts = (struct TYPE *)map_data;                          \
322
323
    struct redist_mapper_data *mydata =                                    \
        (struct redist_mapper_data *)extra_data;                           \
324
325
326
327
    struct space *s = mydata->s;                                           \
    int *dest =                                                            \
        mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base);   \
    int *lcounts = NULL;                                                   \
328
329
    if ((lcounts = (int *)calloc(                                          \
             sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL)   \
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
      error("Failed to allocate counts thread-specific buffer");           \
    for (int k = 0; k < num_elements; k++) {                               \
      for (int j = 0; j < 3; j++) {                                        \
        if (parts[k].x[j] < 0.0)                                           \
          parts[k].x[j] += s->dim[j];                                      \
        else if (parts[k].x[j] >= s->dim[j])                               \
          parts[k].x[j] -= s->dim[j];                                      \
      }                                                                    \
      const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0],    \
                                 parts[k].x[1] * s->iwidth[1],             \
                                 parts[k].x[2] * s->iwidth[2]);            \
      dest[k] = s->cells_top[cid].nodeID;                                  \
      size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k];            \
      lcounts[ind] += 1;                                                   \
    }                                                                      \
    for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++)        \
      atomic_add(&mydata->counts[k], lcounts[k]);                          \
    free(lcounts);                                                         \
  }
349

350
351
/**
 * @brief Accumulate the counts of particles per cell.
352
 * Threadpool helper for accumulating the counts of particles per cell.
353
 *
354
 * part version.
355
 */
Peter W. Draper's avatar
Peter W. Draper committed
356
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part);
357

358
359
/**
 * @brief Accumulate the counts of star particles per cell.
360
 * Threadpool helper for accumulating the counts of particles per cell.
361
 *
362
 * spart version.
363
 */
Peter W. Draper's avatar
Peter W. Draper committed
364
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart);
365

366
367
/**
 * @brief Accumulate the counts of gravity particles per cell.
368
 * Threadpool helper for accumulating the counts of particles per cell.
369
 *
370
 * gpart version.
371
 */
Peter W. Draper's avatar
Peter W. Draper committed
372
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart);
373

374
#endif /* redist_mapper_data */
375

376
#ifdef WITH_MPI /* savelink_mapper_data */
377
378

/* Support for saving the linkage between gparts and parts/sparts. */
379
struct savelink_mapper_data {
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
  int nr_nodes;
  int *counts;
  void *parts;
  int nodeID;
};

/**
 * @brief Save the offset of each gravity partner of a part or spart.
 *
 * The offset is from the start of the sorted particles to be sent to a node.
 * This is possible as parts without gravity partners have a positive id.
 * These offsets are used to restore the pointers on the receiving node.
 *
 * CHECKS should be eliminated as dead code when optimizing.
 */
Peter W. Draper's avatar
Peter W. Draper committed
395
#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS)                      \
Peter W. Draper's avatar
Peter W. Draper committed
396
397
398
  engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \
                                             void *extra_data) {               \
    int *nodes = (int *)map_data;                                              \
399
400
    struct savelink_mapper_data *mydata =                                      \
        (struct savelink_mapper_data *)extra_data;                             \
Peter W. Draper's avatar
Peter W. Draper committed
401
402
403
404
405
406
407
    int nodeID = mydata->nodeID;                                               \
    int nr_nodes = mydata->nr_nodes;                                           \
    int *counts = mydata->counts;                                              \
    struct TYPE *parts = (struct TYPE *)mydata->parts;                         \
                                                                               \
    for (int j = 0; j < num_elements; j++) {                                   \
      int node = nodes[j];                                                     \
408
      int count = 0;                                                           \
Peter W. Draper's avatar
Peter W. Draper committed
409
410
411
      size_t offset = 0;                                                       \
      for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i];  \
                                                                               \
412
      for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) {             \
Peter W. Draper's avatar
Peter W. Draper committed
413
414
        if (parts[k + offset].gpart != NULL) {                                 \
          if (CHECKS)                                                          \
415
            if (parts[k + offset].gpart->id_or_neg_offset > 0)                 \
Peter W. Draper's avatar
Peter W. Draper committed
416
417
418
419
420
421
              error("Trying to link a partnerless " #TYPE "!");                \
          parts[k + offset].gpart->id_or_neg_offset = -count;                  \
          count++;                                                             \
        }                                                                      \
      }                                                                        \
    }                                                                          \
422
423
424
425
426
427
428
  }

/**
 * @brief Save position of part-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
429
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1);
430
#else
Peter W. Draper's avatar
Peter W. Draper committed
431
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0);
432
433
434
435
436
437
438
#endif

/**
 * @brief Save position of spart-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
439
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1);
440
#else
Peter W. Draper's avatar
Peter W. Draper committed
441
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0);
442
443
#endif

444
#endif /* savelink_mapper_data */
445

446
#ifdef WITH_MPI /* relink_mapper_data */
447
448

/* Support for relinking parts, gparts and sparts after moving between nodes. */
449
struct relink_mapper_data {
450
451
452
453
  int nodeID;
  int nr_nodes;
  int *counts;
  int *s_counts;
454
455
456
457
458
459
460
461
462
  int *g_counts;
  struct space *s;
};

/**
 * @brief Restore the part/gpart and spart/gpart links for a list of nodes.
 *
 * @param map_data address of nodes to process.
 * @param num_elements the number nodes to process.
463
464
 * @param extra_data additional data defining the context (a
 * relink_mapper_data).
465
466
467
468
469
 */
static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
                                              void *extra_data) {

  int *nodes = (int *)map_data;
470
  struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data;
471
472
473
474

  int nodeID = mydata->nodeID;
  int nr_nodes = mydata->nr_nodes;
  int *counts = mydata->counts;
475
  int *g_counts = mydata->g_counts;
476
477
  int *s_counts = mydata->s_counts;
  struct space *s = mydata->s;
478
479
480
481

  for (int i = 0; i < num_elements; i++) {

    int node = nodes[i];
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496

    /* Get offsets to correct parts of the counts arrays for this node. */
    size_t offset_parts = 0;
    size_t offset_gparts = 0;
    size_t offset_sparts = 0;
    for (int n = 0; n < node; n++) {
      int ind_recv = n * nr_nodes + nodeID;
      offset_parts += counts[ind_recv];
      offset_gparts += g_counts[ind_recv];
      offset_sparts += s_counts[ind_recv];
    }

    /* Number of gparts sent from this node. */
    int ind_recv = node * nr_nodes + nodeID;
    const size_t count_gparts = g_counts[ind_recv];
497
498

    /* Loop over the gparts received from this node */
499
    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) {
500
501
502
503
504

      /* Does this gpart have a gas partner ? */
      if (s->gparts[k].type == swift_type_gas) {

        const ptrdiff_t partner_index =
505
            offset_parts - s->gparts[k].id_or_neg_offset;
506
507
508
509
510
511
512

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
        s->parts[partner_index].gpart = &s->gparts[k];
      }

      /* Does this gpart have a star partner ? */
513
      else if (s->gparts[k].type == swift_type_stars) {
514
515

        const ptrdiff_t partner_index =
516
            offset_sparts - s->gparts[k].id_or_neg_offset;
517
518
519
520
521
522
523
524
525

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
        s->sparts[partner_index].gpart = &s->gparts[k];
      }
    }
  }
}

526
#endif /* relink_mapper_data */
527

528
/**
529
 * @brief Redistribute the particles amongst the nodes according
530
531
 *      to their cell's node IDs.
 *
532
533
534
535
 * The strategy here is as follows:
 * 1) Each node counts the number of particles it has to send to each other
 * node.
 * 2) The number of particles of each type is then exchanged.
536
537
538
539
 * 3) The particles to send are placed in a temporary buffer in which the
 * part-gpart links are preserved.
 * 4) Each node allocates enough space for the new particles.
 * 5) (Asynchronous) communications are issued to transfer the data.
540
541
 *
 *
542
543
 * @param e The #engine.
 */
544
void engine_redistribute(struct engine *e) {
545

546
#ifdef WITH_MPI
547

548
549
  const int nr_nodes = e->nr_nodes;
  const int nodeID = e->nodeID;
550
  struct space *s = e->s;
551
  struct cell *cells = s->cells_top;
552
  const int nr_cells = s->nr_cells;
553
  struct xpart *xparts = s->xparts;
554
555
  struct part *parts = s->parts;
  struct gpart *gparts = s->gparts;
556
  struct spart *sparts = s->sparts;
557
  ticks tic = getticks();
558

559
560
561
562
563
564
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
  size_t nr_sparts = s->nr_sparts;

  /* Start by moving inhibited particles to the end of the arrays */
  for (size_t k = 0; k < nr_parts; /* void */) {
565
566
    if (parts[k].time_bin == time_bin_inhibited ||
        parts[k].time_bin == time_bin_not_created) {
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
      nr_parts -= 1;

      /* Swap the particle */
      memswap(&parts[k], &parts[nr_parts], sizeof(struct part));

      /* Swap the xpart */
      memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart));

      /* Swap the link with the gpart */
      if (parts[k].gpart != NULL) {
        parts[k].gpart->id_or_neg_offset = -k;
      }
      if (parts[nr_parts].gpart != NULL) {
        parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
      }
    } else {
      k++;
    }
  }

  /* Now move inhibited star particles to the end of the arrays */
  for (size_t k = 0; k < nr_sparts; /* void */) {
589
590
    if (sparts[k].time_bin == time_bin_inhibited ||
        sparts[k].time_bin == time_bin_not_created) {
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
      nr_sparts -= 1;

      /* Swap the particle */
      memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));

      /* Swap the link with the gpart */
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
      if (s->sparts[nr_sparts].gpart != NULL) {
        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
      }
    } else {
      k++;
    }
  }

  /* Finally do the same with the gravity particles */
  for (size_t k = 0; k < nr_gparts; /* void */) {
610
611
    if (gparts[k].time_bin == time_bin_inhibited ||
        gparts[k].time_bin == time_bin_not_created) {
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
      nr_gparts -= 1;

      /* Swap the particle */
      memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart));

      /* Swap the link with part/spart */
      if (s->gparts[k].type == swift_type_gas) {
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
      } else if (s->gparts[k].type == swift_type_stars) {
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
      }
      if (s->gparts[nr_gparts].type == swift_type_gas) {
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
      } else if (s->gparts[nr_gparts].type == swift_type_stars) {
        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
      }
    } else {
      k++;
    }
  }

  /* Now we are ready to deal with real particles and can start the exchange. */

637
  /* Allocate temporary arrays to store the counts of particles to be sent
638
639
   * and the destination of each particle */
  int *counts;
640
  if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
641
    error("Failed to allocate counts temporary buffer.");
642

643
  int *dest;
644
  if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL)
645
646
    error("Failed to allocate dest temporary buffer.");

647
648
649
650
651
  /* Simple index of node IDs, used for mappers over nodes. */
  int *nodes = NULL;
  if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL)
    error("Failed to allocate nodes temporary buffer.");
  for (int k = 0; k < nr_nodes; k++) nodes[k] = k;
652

653
  /* Get destination of each particle */
654
  struct redist_mapper_data redist_data;
655
656
657
  redist_data.s = s;
  redist_data.nodeID = nodeID;
  redist_data.nr_nodes = nr_nodes;
658

659
660
661
  redist_data.counts = counts;
  redist_data.dest = dest;
  redist_data.base = (void *)parts;
662

663
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts,
664
                 nr_parts, sizeof(struct part), 0, &redist_data);
665
666

  /* Sort the particles according to their cell index. */
667
  if (nr_parts > 0)
668
669
    space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes],
                     nr_nodes, 0);
670

671
672
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
673
  for (size_t k = 0; k < nr_parts; k++) {
674
675
    const struct part *p = &s->parts[k];

676
677
678
679
680
681
    if (p->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (p->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

682
    /* New cell index */
683
    const int new_cid =
684
685
686
687
        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
                   p->x[2] * s->iwidth[2]);

    /* New cell of this part */
688
689
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
690

691
692
    if (dest[k] != new_node)
      error("part's new node index not matching sorted index.");
693
694
695
696
697

    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
      error("part not sorted into the right top-level cell!");
698
699
700
  }
#endif

701
702
  /* We will need to re-link the gpart partners of parts, so save their
   * relative positions in the sent lists. */
703
  if (nr_parts > 0 && nr_gparts > 0) {
704

705
    struct savelink_mapper_data savelink_data;
706
707
708
709
710
711
    savelink_data.nr_nodes = nr_nodes;
    savelink_data.counts = counts;
    savelink_data.parts = (void *)parts;
    savelink_data.nodeID = nodeID;
    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part,
                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
712
  }
713
  swift_free("dest", dest);
714

715
  /* Get destination of each s-particle */
716
  int *s_counts;
717
  if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
718
719
720
    error("Failed to allocate s_counts temporary buffer.");

  int *s_dest;
721
  if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL)
722
723
    error("Failed to allocate s_dest temporary buffer.");

724
725
726
  redist_data.counts = s_counts;
  redist_data.dest = s_dest;
  redist_data.base = (void *)sparts;
727

728
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts,
729
                 nr_sparts, sizeof(struct spart), 0, &redist_data);
730
731

  /* Sort the particles according to their cell index. */
732
  if (nr_sparts > 0)
733
734
    space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes,
                      0);
735

736
737
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the spart have been sorted correctly. */
738
  for (size_t k = 0; k < nr_sparts; k++) {
739
740
    const struct spart *sp = &s->sparts[k];

741
742
743
744
745
746
    if (sp->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (sp->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

747
    /* New cell index */
748
    const int new_cid =
749
750
751
752
        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
                   sp->x[2] * s->iwidth[2]);

    /* New cell of this spart */
753
754
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
755

756
757
    if (s_dest[k] != new_node)
      error("spart's new node index not matching sorted index.");
758
759
760
761
762

    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
      error("spart not sorted into the right top-level cell!");
763
764
765
  }
#endif

766
  /* We need to re-link the gpart partners of sparts. */
767
  if (nr_sparts > 0) {
768

769
    struct savelink_mapper_data savelink_data;
770
771
772
773
774
775
    savelink_data.nr_nodes = nr_nodes;
    savelink_data.counts = s_counts;
    savelink_data.parts = (void *)sparts;
    savelink_data.nodeID = nodeID;
    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart,
                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
776
  }
777
  swift_free("s_dest", s_dest);
778

779
  /* Get destination of each g-particle */
780
  int *g_counts;
781
  if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
782
783
784
    error("Failed to allocate g_gcount temporary buffer.");

  int *g_dest;
785
  if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL)
786
787
    error("Failed to allocate g_dest temporary buffer.");

788
789
790
  redist_data.counts = g_counts;
  redist_data.dest = g_dest;
  redist_data.base = (void *)gparts;
791

792
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts,
793
                 nr_gparts, sizeof(struct gpart), 0, &redist_data);
794
795

  /* Sort the gparticles according to their cell index. */
796
  if (nr_gparts > 0)
797
798
    space_gparts_sort(s->gparts, s->parts, s->sparts, g_dest,
                      &g_counts[nodeID * nr_nodes], nr_nodes);
799

800
801
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the gpart have been sorted correctly. */
802
  for (size_t k = 0; k < nr_gparts; k++) {
803
804
    const struct gpart *gp = &s->gparts[k];

805
806
807
808
809
810
    if (gp->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (gp->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

811
    /* New cell index */
812
    const int new_cid =
813
814
815
816
        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
                   gp->x[2] * s->iwidth[2]);

    /* New cell of this gpart */
817
818
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
819

820
    if (g_dest[k] != new_node)
821
822
      error("gpart's new node index not matching sorted index (%d != %d).",
            g_dest[k], new_node);
823
824
825
826
827

    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
      error("gpart not sorted into the right top-level cell!");
828
829
830
  }
#endif

831
  swift_free("g_dest", g_dest);
832

833
834
835
836
837
  /* Get all the counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce particle transfer counts.");

838
  /* Get all the s_counts from all the nodes. */
839
840
841
842
843
844
845
846
847
  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce gparticle transfer counts.");

  /* Get all the g_counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce sparticle transfer counts.");

Peter W. Draper's avatar
Peter W. Draper committed
848
  /* Report how many particles will be moved. */
849
850
  if (e->verbose) {
    if (e->nodeID == 0) {
851
852
      size_t total = 0, g_total = 0, s_total = 0;
      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0;
853
      for (int p = 0, r = 0; p < nr_nodes; p++) {
854
        for (int n = 0; n < nr_nodes; n++) {
855
          total += counts[r];
856
857
          g_total += g_counts[r];
          s_total += s_counts[r];
858
          if (p == n) {
859
860
861
862
            unmoved += counts[r];
            g_unmoved += g_counts[r];
            s_unmoved += s_counts[r];
          }
863
864
865
          r++;
        }
      }
Matthieu Schaller's avatar
Matthieu Schaller committed
866
867
868
869
870
871
872
873
874
875
876
      if (total > 0)
        message("%ld of %ld (%.2f%%) of particles moved", total - unmoved,
                total, 100.0 * (double)(total - unmoved) / (double)total);
      if (g_total > 0)
        message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved,
                g_total,
                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
      if (s_total > 0)
        message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved,
                s_total,
                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
877
    }
878
879
  }

Peter W. Draper's avatar
Peter W. Draper committed
880
881
882
  /* Now each node knows how many parts, sparts and gparts will be transferred
   * to every other node.
   * Get the new numbers of particles for this node. */
883
  size_t nr_parts_new = 0, nr_gparts_new = 0, nr_sparts_new = 0;
884
  for (int k = 0; k < nr_nodes; k++)
885
    nr_parts_new += counts[k * nr_nodes + nodeID];
886
  for (int k = 0; k < nr_nodes; k++)
887
888
889
    nr_gparts_new += g_counts[k * nr_nodes + nodeID];
  for (int k = 0; k < nr_nodes; k++)
    nr_sparts_new += s_counts[k * nr_nodes + nodeID];
890

Peter W. Draper's avatar
Peter W. Draper committed
891
892
893
894
  /* Now exchange the particles, type by type to keep the memory required
   * under control. */

  /* SPH particles. */
895
  void *new_parts = engine_do_redistribute(
896
897
      "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part),
      part_align, part_mpi_type, nr_nodes, nodeID);
898
  swift_free("parts", s->parts);
899
  s->parts = (struct part *)new_parts;
900
901
  s->nr_parts = nr_parts_new;
  s->size_parts = engine_redistribute_alloc_margin * nr_parts_new;
902

Peter W. Draper's avatar
Peter W. Draper committed
903
  /* Extra SPH particle properties. */
904
905
906
  new_parts = engine_do_redistribute(
      "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart),
      xpart_align, xpart_mpi_type, nr_nodes, nodeID);
907
  swift_free("xparts", s->xparts);
908
  s->xparts = (struct xpart *)new_parts;
909

Peter W. Draper's avatar
Peter W. Draper committed
910
  /* Gravity particles. */
911
912
913
  new_parts = engine_do_redistribute(
      "gparts", g_counts, (char *)s->gparts, nr_gparts_new,
      sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID);
914
  swift_free("gparts", s->gparts);
915
  s->gparts = (struct gpart *)new_parts;
916
917
  s->nr_gparts = nr_gparts_new;
  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new;
918

Peter W. Draper's avatar
Peter W. Draper committed
919
  /* Star particles. */
920
921
922
  new_parts = engine_do_redistribute(
      "sparts", s_counts, (char *)s->sparts, nr_sparts_new,
      sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID);
923
  swift_free("sparts", s->sparts);
924
  s->sparts = (struct spart *)new_parts;
925
926
  s->nr_sparts = nr_sparts_new;
  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new;
927

928
929
930
  /* All particles have now arrived. Time for some final operations on the
     stuff we just received */

931
932
933
  /* Restore the part<->gpart and spart<->gpart links.
   * Generate indices and counts for threadpool tasks. Note we process a node
   * at a time. */
934
  struct relink_mapper_data relink_data;
935
  relink_data.s = s;
936
937
938
939
940
  relink_data.counts = counts;
  relink_data.g_counts = g_counts;
  relink_data.s_counts = s_counts;
  relink_data.nodeID = nodeID;
  relink_data.nr_nodes = nr_nodes;
941

942
943
  threadpool_map(&e->threadpool, engine_redistribute_relink_mapper, nodes,
                 nr_nodes, sizeof(int), 1, &relink_data);
944
  free(nodes);
945

946
  /* Clean up the counts now we are done. */
947
948
949
950
  free(counts);
  free(g_counts);
  free(s_counts);

951
#ifdef SWIFT_DEBUG_CHECKS
952
  /* Verify that all parts are in the right place. */
953
  for (size_t k = 0; k < nr_parts_new; k++) {
954
955
956
    const int cid = cell_getid(s->cdim, s->parts[k].x[0] * s->iwidth[0],
                               s->parts[k].x[1] * s->iwidth[1],
                               s->parts[k].x[2] * s->iwidth[2]);
957
    if (cells[cid].nodeID != nodeID)
958
      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
959
960
            cells[cid].nodeID);
  }