engine.c 168 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
36
#include <sys/stat.h>
37
#include <sys/types.h>
38
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
39

40

41
42
/* MPI headers. */
#ifdef WITH_MPI
43

44
#include <mpi.h>
45
46
#endif

Angus Lepper's avatar
Angus Lepper committed
47
48
#ifdef HAVE_LIBNUMA
#include <numa.h>
49
50
#endif

51
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
52
#include "engine.h"
53
54

/* Local headers. */
55
#include "active.h"
56
#include "atomic.h"
57
#include "black_holes_properties.h"
58
#include "cell.h"
59
#include "chemistry.h"
60
#include "clocks.h"
61
#include "cooling.h"
62
#include "cosmology.h"
63
64
#include "cycle.h"
#include "debug.h"
65
#include "entropy_floor.h"
66
#include "equation_of_state.h"
67
#include "error.h"
Alexei Borissov's avatar
Alexei Borissov committed
68
#include "feedback.h"
69
#include "gravity.h"
70
#include "gravity_cache.h"
71
#include "hydro.h"
lhausamm's avatar
lhausamm committed
72
#include "logger.h"
lhausamm's avatar
lhausamm committed
73
#include "logger_io.h"
74
#include "map.h"
75
#include "memuse.h"
76
#include "minmax.h"
77
#include "outputlist.h"
78
#include "parallel_io.h"
79
#include "part.h"
80
#include "partition.h"
James Willis's avatar
James Willis committed
81
#include "profiler.h"
82
#include "proxy.h"
83
#include "restart.h"
84
#include "runner.h"
85
86
#include "serial_io.h"
#include "single_io.h"
87
#include "sort_part.h"
88
#include "star_formation.h"
89
#include "star_formation_logger.h"
Loic Hausammann's avatar
Loic Hausammann committed
90
#include "stars_io.h"
91
#include "statistics.h"
92
#include "timers.h"
93
#include "tools.h"
94
#include "units.h"
95
#include "velociraptor_interface.h"
96
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
97

98
99
100
/* Particle cache size. */
#define CACHE_SIZE 512

101
102
103
104
105
const char *engine_policy_names[] = {"none",
                                     "rand",
                                     "steal",
                                     "keep",
                                     "block",
106
                                     "cpu tight",
107
                                     "mpi",
108
                                     "numa affinity",
109
                                     "hydro",
110
111
112
113
114
                                     "self gravity",
                                     "external gravity",
                                     "cosmological integration",
                                     "drift everything",
                                     "reconstruct multi-poles",
115
                                     "temperature",
116
                                     "cooling",
James Willis's avatar
James Willis committed
117
                                     "stars",
118
                                     "structure finding",
119
                                     "star formation",
120
                                     "feedback",
121
                                     "black holes",
122
                                     "fof search",
123
124
                                     "time-step limiter",
                                     "time-step sync"};
Pedro Gonnet's avatar
Pedro Gonnet committed
125

126
127
128
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

129
/** The current step of the engine as a global variable (for messages). */
130
int engine_current_step;
131

132
133
134
extern int engine_max_parts_per_ghost;
extern int engine_max_sparts_per_ghost;

135
136
137
138
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
139
 * @param l A pointer to the #link, will be modified atomically.
140
141
142
143
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
144
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
145

146
147
148
149
150
151
#ifdef SWIFT_DEBUG_CHECKS
  if (t == NULL) {
    error("Trying to link NULL task.");
  }
#endif

152
  /* Get the next free link. */
153
  const size_t ind = atomic_inc(&e->nr_links);
154
  if (ind >= e->size_links) {
155
156
157
    error(
        "Link table overflow. Increase the value of "
        "`Scheduler:links_per_tasks`.");
158
159
  }
  struct link *res = &e->links[ind];
160

161
  /* Set it atomically. */
162
  res->t = t;
163
  res->next = atomic_swap(l, res);
164
}
165

166
/**
167
 * @brief Repartition the cells amongst the nodes.
168
169
170
 *
 * @param e The #engine.
 */
171
void engine_repartition(struct engine *e) {
172

173
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
174

175
176
  ticks tic = getticks();

177
#ifdef SWIFT_DEBUG_CHECKS
178
179
180
181
  /* Be verbose about this. */
  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
  fflush(stdout);

182
  /* Check that all cells have been drifted to the current time */
183
  space_check_drift_point(e->s, e->ti_current, /*check_multipoles=*/0);
184
185
#endif

186
  /* Clear the repartition flag. */
187
  e->forcerepart = 0;
188

189
  /* Nothing to do if only using a single node. Also avoids METIS
190
   * bug that doesn't handle this case well. */
191
  if (e->nr_nodes == 1) return;
192

193
194
  /* Generate the fixed costs include file. */
  if (e->step > 3 && e->reparttype->trigger <= 1.f) {
Peter W. Draper's avatar
Peter W. Draper committed
195
196
    task_dump_stats("partition_fixed_costs.h", e, /* header = */ 1,
                    /* allranks = */ 1);
197
198
  }

199
  /* Do the repartitioning. */
200
  partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
201
                        e->sched.tasks, e->sched.nr_tasks);
202

203
204
  /* Partitioning requires copies of the particles, so we need to reduce the
   * memory in use to the minimum, we can free the sorting indices and the
205
206
   * tasks as these will be regenerated at the next rebuild. Also the foreign
   * particle arrays can go as these will be regenerated in proxy exchange. */
207
208

  /* Sorting indices. */
209
  if (e->s->cells_top != NULL) space_free_cells(e->s);
210

211
212
  /* Task arrays. */
  scheduler_free_tasks(&e->sched);
213

214
215
216
  /* Foreign parts. */
  space_free_foreign_parts(e->s);

217
218
219
  /* Now comes the tricky part: Exchange particles between all nodes.
     This is done in two steps, first allreducing a matrix of
     how many particles go from where to where, then re-allocating
220
     the parts array, and emitting the sends and receives.
221
222
223
224
225
226
227
228
229
230
231
     Finally, the space, tasks, and proxies need to be rebuilt. */

  /* Redistribute the particles between the nodes. */
  engine_redistribute(e);

  /* Make the proxies. */
  engine_makeproxies(e);

  /* Tell the engine it should re-build whenever possible */
  e->forcerebuild = 1;

232
233
234
  /* Flag that a repartition has taken place */
  e->step_props |= engine_step_prop_repartition;

235
236
237
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
238
#else
239
  if (e->reparttype->type != REPART_NONE)
240
    error("SWIFT was not compiled with MPI and METIS or ParMETIS support.");
241
242
243

  /* Clear the repartition flag. */
  e->forcerepart = 0;
244
#endif
245
}
246

247
248
249
250
251
252
253
254
255
/**
 * @brief Decide whether trigger a repartition the cells amongst the nodes.
 *
 * @param e The #engine.
 */
void engine_repartition_trigger(struct engine *e) {

#ifdef WITH_MPI

256
257
  const ticks tic = getticks();

258
259
260
  /* Do nothing if there have not been enough steps since the last repartition
   * as we don't want to repeat this too often or immediately after a
   * repartition step. Also nothing to do when requested. */
261
262
  if (e->step - e->last_repartition >= 2 &&
      e->reparttype->type != REPART_NONE) {
263

264
265
266
267
268
    /* If we have fixed costs available and this is step 2 or we are forcing
     * repartitioning then we do a fixed costs one now. */
    if (e->reparttype->trigger > 1 ||
        (e->step == 2 && e->reparttype->use_fixed_costs)) {

269
      if (e->reparttype->trigger > 1) {
Matthieu Schaller's avatar
Matthieu Schaller committed
270
        if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1;
271
272
273
      } else {
        e->forcerepart = 1;
      }
274
      e->reparttype->use_ticks = 0;
275
276
277

    } else {

278
279
280
      /* It is only worth checking the CPU loads when we have processed a
       * significant number of all particles as we require all tasks to have
       * timings. */
281
282
283
284
285
      if ((e->updates > 1 &&
           e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
          (e->g_updates > 1 &&
           e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {

286
287
288
289
290
291
        /* Should we are use the task timings or fixed costs. */
        if (e->reparttype->use_fixed_costs > 1) {
          e->reparttype->use_ticks = 0;
        } else {
          e->reparttype->use_ticks = 1;
        }
292

293
        /* Get CPU time used since the last call to this function. */
Peter W. Draper's avatar
Peter W. Draper committed
294
295
        double elapsed_cputime =
            clocks_get_cputime_used() - e->cputime_last_step;
296
297
298

        /* Gather the elapsed CPU times from all ranks for the last step. */
        double elapsed_cputimes[e->nr_nodes];
Peter W. Draper's avatar
Peter W. Draper committed
299
300
        MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
                   MPI_DOUBLE, 0, MPI_COMM_WORLD);
301
302
        if (e->nodeID == 0) {

303
          /* Get the range and mean of cputimes. */
304
305
          double mintime = elapsed_cputimes[0];
          double maxtime = elapsed_cputimes[0];
306
          double sum = elapsed_cputimes[0];
307
308
309
          for (int k = 1; k < e->nr_nodes; k++) {
            if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
            if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
310
            sum += elapsed_cputimes[k];
311
          }
312
          double mean = sum / (double)e->nr_nodes;
313
314

          /* Are we out of balance? */
315
316
          double abs_trigger = fabs(e->reparttype->trigger);
          if (((maxtime - mintime) / mean) > abs_trigger) {
317
            if (e->verbose)
Peter W. Draper's avatar
Peter W. Draper committed
318
319
              message("trigger fraction %.3f > %.3f will repartition",
                      (maxtime - mintime) / mean, abs_trigger);
320
            e->forcerepart = 1;
321
          } else {
Peter W. Draper's avatar
Peter W. Draper committed
322
            if (e->verbose)
Peter W. Draper's avatar
Peter W. Draper committed
323
324
              message("trigger fraction %.3f =< %.3f will not repartition",
                      (maxtime - mintime) / mean, abs_trigger);
325
326
327
          }
        }
      }
328
329
330

      /* All nodes do this together. */
      MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
331
332
333
    }

    /* Remember we did this. */
Peter W. Draper's avatar
Peter W. Draper committed
334
    if (e->forcerepart) e->last_repartition = e->step;
335
336
  }

337
338
339
  /* We always reset CPU time for next check, unless it will not be used. */
  if (e->reparttype->type != REPART_NONE)
    e->cputime_last_step = clocks_get_cputime_used();
340
341
342
343

  if (e->verbose)
    message("took %.3f %s", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
344
345
346
#endif
}

347
/**
348
349
350
351
 * @brief Exchange cell structures with other nodes.
 *
 * @param e The #engine.
 */
352
void engine_exchange_cells(struct engine *e) {
353

354
#ifdef WITH_MPI
355

356
  const int with_gravity = e->policy & engine_policy_self_gravity;
357
  const ticks tic = getticks();
358

359
  /* Exchange the cell structure with neighbouring ranks. */
360
  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
361

362
363
364
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
365
366
367
368
369
370
371

#else
  error("SWIFT was not compiled with MPI support.");
#endif
}

/**
372
 * @brief Exchange straying particles with other nodes.
373
374
 *
 * @param e The #engine.
Pedro Gonnet's avatar
Pedro Gonnet committed
375
 * @param offset_parts The index in the parts array as of which the foreign
376
 *        parts reside (i.e. the current number of local #part).
377
 * @param ind_part The foreign #cell ID of each part.
378
 * @param Npart The number of stray parts, contains the number of parts received
379
 *        on return.
Pedro Gonnet's avatar
Pedro Gonnet committed
380
 * @param offset_gparts The index in the gparts array as of which the foreign
381
 *        parts reside (i.e. the current number of local #gpart).
382
 * @param ind_gpart The foreign #cell ID of each gpart.
Pedro Gonnet's avatar
Pedro Gonnet committed
383
 * @param Ngpart The number of stray gparts, contains the number of gparts
Pedro Gonnet's avatar
Pedro Gonnet committed
384
 *        received on return.
385
 * @param offset_sparts The index in the sparts array as of which the foreign
386
 *        parts reside (i.e. the current number of local #spart).
387
388
389
 * @param ind_spart The foreign #cell ID of each spart.
 * @param Nspart The number of stray sparts, contains the number of sparts
 *        received on return.
390
391
392
393
394
 * @param offset_bparts The index in the bparts array as of which the foreign
 *        parts reside (i.e. the current number of local #bpart).
 * @param ind_bpart The foreign #cell ID of each bpart.
 * @param Nbpart The number of stray bparts, contains the number of bparts
 *        received on return.
395
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
396
397
 * Note that this function does not mess-up the linkage between parts and
 * gparts, i.e. the received particles have correct linkeage.
398
 */
399
400
401
402
void engine_exchange_strays(struct engine *e, const size_t offset_parts,
                            const int *ind_part, size_t *Npart,
                            const size_t offset_gparts, const int *ind_gpart,
                            size_t *Ngpart, const size_t offset_sparts,
403
404
405
                            const int *ind_spart, size_t *Nspart,
                            const size_t offset_bparts, const int *ind_bpart,
                            size_t *Nbpart) {
406
407
408

#ifdef WITH_MPI

409
  struct space *s = e->s;
410
  ticks tic = getticks();
411
412

  /* Re-set the proxies. */
Pedro Gonnet's avatar
Pedro Gonnet committed
413
414
415
  for (int k = 0; k < e->nr_proxies; k++) {
    e->proxies[k].nr_parts_out = 0;
    e->proxies[k].nr_gparts_out = 0;
416
    e->proxies[k].nr_sparts_out = 0;
417
    e->proxies[k].nr_bparts_out = 0;
Pedro Gonnet's avatar
Pedro Gonnet committed
418
  }
419

420
  /* Put the parts into the corresponding proxies. */
421
  for (size_t k = 0; k < *Npart; k++) {
422
423
424
425

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_part[k] == -1) continue;

426
    /* Get the target node and proxy ID. */
427
    const int node_id = e->s->cells_top[ind_part[k]].nodeID;
428
429
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
430
    const int pid = e->proxy_ind[node_id];
431
    if (pid < 0) {
432
433
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
434
          "id=%lld, x=[%e,%e,%e].",
Pedro Gonnet's avatar
Pedro Gonnet committed
435
436
437
          node_id, s->parts[offset_parts + k].id,
          s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1],
          s->parts[offset_parts + k].x[2]);
438
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
439

440
441
    /* Re-link the associated gpart with the buffer offset of the part. */
    if (s->parts[offset_parts + k].gpart != NULL) {
442
443
      s->parts[offset_parts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_parts_out;
444
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
445

446
447
448
449
450
#ifdef SWIFT_DEBUG_CHECKS
    if (s->parts[offset_parts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

451
    /* Load the part and xpart into the proxy. */
452
453
454
    proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k],
                     &s->xparts[offset_parts + k], 1);
  }
455

456
  /* Put the sparts into the corresponding proxies. */
457
  for (size_t k = 0; k < *Nspart; k++) {
458
459
460
461
462

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_spart[k] == -1) continue;

    /* Get the target node and proxy ID. */
463
464
465
466
    const int node_id = e->s->cells_top[ind_spart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
467
    if (pid < 0) {
468
469
470
471
472
473
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lld, x=[%e,%e,%e].",
          node_id, s->sparts[offset_sparts + k].id,
          s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1],
          s->sparts[offset_sparts + k].x[2]);
474
    }
475

476
    /* Re-link the associated gpart with the buffer offset of the spart. */
477
478
479
480
481
    if (s->sparts[offset_sparts + k].gpart != NULL) {
      s->sparts[offset_sparts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_sparts_out;
    }

482
483
484
485
486
#ifdef SWIFT_DEBUG_CHECKS
    if (s->sparts[offset_sparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

487
488
489
    /* Load the spart into the proxy */
    proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1);
  }
490

491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
  /* Put the bparts into the corresponding proxies. */
  for (size_t k = 0; k < *Nbpart; k++) {

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_bpart[k] == -1) continue;

    /* Get the target node and proxy ID. */
    const int node_id = e->s->cells_top[ind_bpart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
    if (pid < 0) {
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lld, x=[%e,%e,%e].",
          node_id, s->bparts[offset_bparts + k].id,
          s->bparts[offset_bparts + k].x[0], s->bparts[offset_bparts + k].x[1],
          s->bparts[offset_bparts + k].x[2]);
    }

    /* Re-link the associated gpart with the buffer offset of the bpart. */
    if (s->bparts[offset_bparts + k].gpart != NULL) {
      s->bparts[offset_bparts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_bparts_out;
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (s->bparts[offset_bparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

    /* Load the bpart into the proxy */
    proxy_bparts_load(&e->proxies[pid], &s->bparts[offset_bparts + k], 1);
  }
525

526
527
  /* Put the gparts into the corresponding proxies. */
  for (size_t k = 0; k < *Ngpart; k++) {
528
529
530
531
532

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_gpart[k] == -1) continue;

    /* Get the target node and proxy ID. */
533
534
535
536
    const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
537
    if (pid < 0) {
538
539
540
541
542
543
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lli, x=[%e,%e,%e].",
          node_id, s->gparts[offset_gparts + k].id_or_neg_offset,
          s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1],
          s->gparts[offset_gparts + k].x[2]);
544
545
546
547
548
549
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (s->gparts[offset_gparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif
550
551
552
553
554

    /* Load the gpart into the proxy */
    proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1);
  }

555
  /* Launch the proxies. */
556
557
  MPI_Request reqs_in[5 * engine_maxproxies];
  MPI_Request reqs_out[5 * engine_maxproxies];
558
  for (int k = 0; k < e->nr_proxies; k++) {
559
    proxy_parts_exchange_first(&e->proxies[k]);
560
561
562
563
564
    reqs_in[k] = e->proxies[k].req_parts_count_in;
    reqs_out[k] = e->proxies[k].req_parts_count_out;
  }

  /* Wait for each count to come in and start the recv. */
565
  for (int k = 0; k < e->nr_proxies; k++) {
566
    int pid = MPI_UNDEFINED;
567
568
    if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) !=
            MPI_SUCCESS ||
569
570
571
        pid == MPI_UNDEFINED)
      error("MPI_Waitany failed.");
    // message( "request from proxy %i has arrived." , pid );
572
    proxy_parts_exchange_second(&e->proxies[pid]);
573
  }
574

575
  /* Wait for all the sends to have finished too. */
576
577
578
  if (MPI_Waitall(e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
    error("MPI_Waitall on sends failed.");

579
  /* Count the total number of incoming particles and make sure we have
580
     enough space to accommodate them. */
581
582
  int count_parts_in = 0;
  int count_gparts_in = 0;
583
  int count_sparts_in = 0;
584
  int count_bparts_in = 0;
585
586
587
  for (int k = 0; k < e->nr_proxies; k++) {
    count_parts_in += e->proxies[k].nr_parts_in;
    count_gparts_in += e->proxies[k].nr_gparts_in;
588
    count_sparts_in += e->proxies[k].nr_sparts_in;
589
    count_bparts_in += e->proxies[k].nr_bparts_in;
590
591
  }
  if (e->verbose) {
592
593
594
595
596
    message(
        "sent out %zu/%zu/%zu/%zu parts/gparts/sparts/bparts, got %i/%i/%i/%i "
        "back.",
        *Npart, *Ngpart, *Nspart, *Nbpart, count_parts_in, count_gparts_in,
        count_sparts_in, count_bparts_in);
597
  }
598
599

  /* Reallocate the particle arrays if necessary */
600
  if (offset_parts + count_parts_in > s->size_parts) {
601
    s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow;
602
603
    struct part *parts_new = NULL;
    struct xpart *xparts_new = NULL;
604
    if (swift_memalign("parts", (void **)&parts_new, part_align,
605
                       sizeof(struct part) * s->size_parts) != 0 ||
606
        swift_memalign("xparts", (void **)&xparts_new, xpart_align,
607
608
                       sizeof(struct xpart) * s->size_parts) != 0)
      error("Failed to allocate new part data.");
609
610
    memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts);
    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts);
611
612
    swift_free("parts", s->parts);
    swift_free("xparts", s->xparts);
613
614
    s->parts = parts_new;
    s->xparts = xparts_new;
615
616

    /* Reset the links */
Pedro Gonnet's avatar
Pedro Gonnet committed
617
618
    for (size_t k = 0; k < offset_parts; k++) {
      if (s->parts[k].gpart != NULL) {
619
        s->parts[k].gpart->id_or_neg_offset = -k;
Pedro Gonnet's avatar
Pedro Gonnet committed
620
621
      }
    }
622
  }
623

624
625
626
  if (offset_sparts + count_sparts_in > s->size_sparts) {
    s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow;
    struct spart *sparts_new = NULL;
627
    if (swift_memalign("sparts", (void **)&sparts_new, spart_align,
628
629
630
                       sizeof(struct spart) * s->size_sparts) != 0)
      error("Failed to allocate new spart data.");
    memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts);
631
    swift_free("sparts", s->sparts);
632
    s->sparts = sparts_new;
633
634

    /* Reset the links */
635
    for (size_t k = 0; k < offset_sparts; k++) {
636
637
638
639
640
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
    }
  }
641

642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
  if (offset_bparts + count_bparts_in > s->size_bparts) {
    s->size_bparts = (offset_bparts + count_bparts_in) * engine_parts_size_grow;
    struct bpart *bparts_new = NULL;
    if (swift_memalign("bparts", (void **)&bparts_new, bpart_align,
                       sizeof(struct bpart) * s->size_bparts) != 0)
      error("Failed to allocate new bpart data.");
    memcpy(bparts_new, s->bparts, sizeof(struct bpart) * offset_bparts);
    swift_free("bparts", s->bparts);
    s->bparts = bparts_new;

    /* Reset the links */
    for (size_t k = 0; k < offset_bparts; k++) {
      if (s->bparts[k].gpart != NULL) {
        s->bparts[k].gpart->id_or_neg_offset = -k;
      }
    }
  }
659

660
  if (offset_gparts + count_gparts_in > s->size_gparts) {
661
    s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow;
662
    struct gpart *gparts_new = NULL;
663
    if (swift_memalign("gparts", (void **)&gparts_new, gpart_align,
664
665
666
                       sizeof(struct gpart) * s->size_gparts) != 0)
      error("Failed to allocate new gpart data.");
    memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts);
667
    swift_free("gparts", s->gparts);
668
    s->gparts = gparts_new;
669

670
    /* Reset the links */
Pedro Gonnet's avatar
Pedro Gonnet committed
671
    for (size_t k = 0; k < offset_gparts; k++) {
672
      if (s->gparts[k].type == swift_type_gas) {
673
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
674
      } else if (s->gparts[k].type == swift_type_stars) {
675
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
676
677
      } else if (s->gparts[k].type == swift_type_black_hole) {
        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
Pedro Gonnet's avatar
Pedro Gonnet committed
678
679
      }
    }
680
  }
681
682

  /* Collect the requests for the particle data from the proxies. */
683
684
  int nr_in = 0, nr_out = 0;
  for (int k = 0; k < e->nr_proxies; k++) {
685
    if (e->proxies[k].nr_parts_in > 0) {
686
687
      reqs_in[5 * k] = e->proxies[k].req_parts_in;
      reqs_in[5 * k + 1] = e->proxies[k].req_xparts_in;
688
689
      nr_in += 2;
    } else {
690
      reqs_in[5 * k] = reqs_in[5 * k + 1] = MPI_REQUEST_NULL;
691
692
    }
    if (e->proxies[k].nr_gparts_in > 0) {
693
      reqs_in[5 * k + 2] = e->proxies[k].req_gparts_in;
694
      nr_in += 1;
695
    } else {
696
      reqs_in[5 * k + 2] = MPI_REQUEST_NULL;
697
    }
698
    if (e->proxies[k].nr_sparts_in > 0) {
699
      reqs_in[5 * k + 3] = e->proxies[k].req_sparts_in;
700
701
      nr_in += 1;
    } else {
702
703
704
705
706
707
708
      reqs_in[5 * k + 3] = MPI_REQUEST_NULL;
    }
    if (e->proxies[k].nr_bparts_in > 0) {
      reqs_in[5 * k + 4] = e->proxies[k].req_bparts_in;
      nr_in += 1;
    } else {
      reqs_in[5 * k + 4] = MPI_REQUEST_NULL;
709
710
    }

711
    if (e->proxies[k].nr_parts_out > 0) {
712
713
      reqs_out[5 * k] = e->proxies[k].req_parts_out;
      reqs_out[5 * k + 1] = e->proxies[k].req_xparts_out;
714
715
      nr_out += 2;
    } else {
716
      reqs_out[5 * k] = reqs_out[5 * k + 1] = MPI_REQUEST_NULL;
717
718
    }
    if (e->proxies[k].nr_gparts_out > 0) {
719
      reqs_out[5 * k + 2] = e->proxies[k].req_gparts_out;
720
721
      nr_out += 1;
    } else {
722
      reqs_out[5 * k + 2] = MPI_REQUEST_NULL;
723
724
    }
    if (e->proxies[k].nr_sparts_out > 0) {
725
726
727
728
729
730
731
      reqs_out[5 * k + 3] = e->proxies[k].req_sparts_out;
      nr_out += 1;
    } else {
      reqs_out[5 * k + 3] = MPI_REQUEST_NULL;
    }
    if (e->proxies[k].nr_bparts_out > 0) {
      reqs_out[5 * k + 4] = e->proxies[k].req_bparts_out;
732
      nr_out += 1;
733
    } else {
734
      reqs_out[5 * k + 4] = MPI_REQUEST_NULL;
735
    }
736
737
738
739
  }

  /* Wait for each part array to come in and collect the new
     parts from the proxies. */
740
  int count_parts = 0, count_gparts = 0, count_sparts = 0, count_bparts = 0;
741
  for (int k = 0; k < nr_in; k++) {
742
    int err, pid;
743
    if ((err = MPI_Waitany(5 * e->nr_proxies, reqs_in, &pid,
744
                           MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
745
746
747
748
749
750
      char buff[MPI_MAX_ERROR_STRING];
      int res;
      MPI_Error_string(err, buff, &res);
      error("MPI_Waitany failed (%s).", buff);
    }
    if (pid == MPI_UNDEFINED) break;
751
752
    // message( "request from proxy %i has arrived." , pid / 5 );
    pid = 5 * (pid / 5);
Pedro Gonnet's avatar
Pedro Gonnet committed
753

754
    /* If all the requests for a given proxy have arrived... */
755
756
    if (reqs_in[pid + 0] == MPI_REQUEST_NULL &&
        reqs_in[pid + 1] == MPI_REQUEST_NULL &&
757
        reqs_in[pid + 2] == MPI_REQUEST_NULL &&
758
        reqs_in[pid + 3] == MPI_REQUEST_NULL &&
759
        reqs_in[pid + 4] == MPI_REQUEST_NULL) {
760
      /* Copy the particle data to the part/xpart/gpart arrays. */
761
      struct proxy *prox = &e->proxies[pid / 5];
762
763
764
765
766
767
      memcpy(&s->parts[offset_parts + count_parts], prox->parts_in,
             sizeof(struct part) * prox->nr_parts_in);
      memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in,
             sizeof(struct xpart) * prox->nr_parts_in);
      memcpy(&s->gparts[offset_gparts + count_gparts], prox->gparts_in,
             sizeof(struct gpart) * prox->nr_gparts_in);
768
769
      memcpy(&s->sparts[offset_sparts + count_sparts], prox->sparts_in,
             sizeof(struct spart) * prox->nr_sparts_in);
770
771
      memcpy(&s->bparts[offset_bparts + count_bparts], prox->bparts_in,
             sizeof(struct bpart) * prox->nr_bparts_in);
772
773
      /* for (int k = offset; k < offset + count; k++)
         message(
774
            "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.",
775
            s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1],
776
            s->parts[k].x[2], s->parts[k].h, p->nodeID); */
Pedro Gonnet's avatar
Pedro Gonnet committed
777

778
      /* Re-link the gparts. */
779
780
      for (int kk = 0; kk < prox->nr_gparts_in; kk++) {
        struct gpart *gp = &s->gparts[offset_gparts + count_gparts + kk];
781
782

        if (gp->type == swift_type_gas) {
783
          struct part *p =
784
              &s->parts[offset_parts + count_parts - gp->id_or_neg_offset];
785
          gp->id_or_neg_offset = s->parts - p;
786
          p->gpart = gp;
787
        } else if (gp->type == swift_type_stars) {
788
789
790
791
          struct spart *sp =
              &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset];
          gp->id_or_neg_offset = s->sparts - sp;
          sp->gpart = gp;
792
793
794
795
796
        } else if (gp->type == swift_type_black_hole) {
          struct bpart *bp =
              &s->bparts[offset_bparts + count_bparts - gp->id_or_neg_offset];
          gp->id_or_neg_offset = s->bparts - bp;
          bp->gpart = gp;
797
798
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
799

800
      /* Advance the counters. */
801
802
      count_parts += prox->nr_parts_in;
      count_gparts += prox->nr_gparts_in;
803
      count_sparts += prox->nr_sparts_in;
804
      count_bparts += prox->nr_bparts_in;
805
    }
806
  }
Pedro Gonnet's avatar
Pedro Gonnet committed
807

808
  /* Wait for all the sends to have finished too. */
Pedro Gonnet's avatar
Pedro Gonnet committed
809
  if (nr_out > 0)
810
    if (MPI_Waitall(5 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
Pedro Gonnet's avatar
Pedro Gonnet committed
811
812
813
        MPI_SUCCESS)
      error("MPI_Waitall on sends failed.");

814
815
816
817
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());

818
  /* Return the number of harvested parts. */
819
820
  *Npart = count_parts;
  *Ngpart = count_gparts;
821
  *Nspart = count_sparts;
822
  *Nbpart = count_bparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
823

824
#else
Pedro Gonnet's avatar
Pedro Gonnet committed
825
  error("SWIFT was not compiled with MPI support.");
826
827
#endif
}
828

829
830
831
832
833
834
835
/**
 * @brief Exchanges the top-level multipoles between all the nodes
 * such that every node has a multipole for each top-level cell.
 *
 * @param e The #engine.
 */
void engine_exchange_top_multipoles(struct engine *e) {
Matthieu Schaller's avatar
Matthieu Schaller committed
836

837
838
#ifdef WITH_MPI

839
840
  ticks tic = getticks();

841
#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
842
843
844
845
846
847
848
849
850
851
  for (int i = 0; i < e->s->nr_cells; ++i) {
    const struct gravity_tensors *m = &e->s->multipoles_top[i];
    if (e->s->cells_top[i].nodeID == engine_rank) {
      if (m->m_pole.M_000 > 0.) {
        if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
          error("Invalid multipole position in X");
        if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
          error("Invalid multipole position in Y");
        if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
          error("Invalid multipole position in Z");
852
853
      }
    } else {