engine.c 167 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
36
37
#include <sys/stat.h>
#include <sys/types.h>
38
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
39

40
41
/* MPI headers. */
#ifdef WITH_MPI
42

43
#include <mpi.h>
44
45
#endif

Angus Lepper's avatar
Angus Lepper committed
46
47
#ifdef HAVE_LIBNUMA
#include <numa.h>
48
49
#endif

50
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
51
#include "engine.h"
52
53

/* Local headers. */
54
#include "active.h"
55
#include "atomic.h"
56
#include "black_holes_properties.h"
57
#include "cell.h"
58
#include "chemistry.h"
59
#include "clocks.h"
60
#include "cooling.h"
61
#include "cosmology.h"
62
63
#include "cycle.h"
#include "debug.h"
64
#include "entropy_floor.h"
65
#include "equation_of_state.h"
66
#include "error.h"
Alexei Borissov's avatar
Alexei Borissov committed
67
#include "feedback.h"
68
#include "gravity.h"
69
#include "gravity_cache.h"
70
#include "hydro.h"
lhausamm's avatar
lhausamm committed
71
#include "logger.h"
lhausamm's avatar
lhausamm committed
72
#include "logger_io.h"
73
#include "map.h"
74
#include "memuse.h"
75
#include "minmax.h"
76
#include "outputlist.h"
77
#include "parallel_io.h"
78
#include "part.h"
79
#include "partition.h"
James Willis's avatar
James Willis committed
80
#include "profiler.h"
81
#include "proxy.h"
82
#include "restart.h"
83
#include "runner.h"
84
85
#include "serial_io.h"
#include "single_io.h"
86
#include "sort_part.h"
87
#include "star_formation.h"
88
#include "star_formation_logger.h"
Loic Hausammann's avatar
Loic Hausammann committed
89
#include "stars_io.h"
90
#include "statistics.h"
91
#include "timers.h"
92
#include "tools.h"
93
#include "units.h"
94
#include "velociraptor_interface.h"
95
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
96

97
98
99
/* Particle cache size. */
#define CACHE_SIZE 512

100
101
102
103
104
const char *engine_policy_names[] = {"none",
                                     "rand",
                                     "steal",
                                     "keep",
                                     "block",
105
                                     "cpu tight",
106
                                     "mpi",
107
                                     "numa affinity",
108
                                     "hydro",
109
110
111
112
113
                                     "self gravity",
                                     "external gravity",
                                     "cosmological integration",
                                     "drift everything",
                                     "reconstruct multi-poles",
114
                                     "temperature",
115
                                     "cooling",
James Willis's avatar
James Willis committed
116
                                     "stars",
117
                                     "structure finding",
118
                                     "star formation",
119
                                     "feedback",
120
                                     "black holes",
121
                                     "fof search",
122
123
                                     "time-step limiter",
                                     "time-step sync"};
Pedro Gonnet's avatar
Pedro Gonnet committed
124

125
126
127
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

128
/** The current step of the engine as a global variable (for messages). */
129
int engine_current_step;
130

131
132
extern int engine_max_parts_per_ghost;
extern int engine_max_sparts_per_ghost;
133

134
135
136
137
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
138
 * @param l A pointer to the #link, will be modified atomically.
139
140
141
142
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
143
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
144

145
146
147
148
149
150
#ifdef SWIFT_DEBUG_CHECKS
  if (t == NULL) {
    error("Trying to link NULL task.");
  }
#endif

151
  /* Get the next free link. */
152
  const size_t ind = atomic_inc(&e->nr_links);
153
  if (ind >= e->size_links) {
154
155
156
    error(
        "Link table overflow. Increase the value of "
        "`Scheduler:links_per_tasks`.");
157
158
  }
  struct link *res = &e->links[ind];
159

160
  /* Set it atomically. */
161
  res->t = t;
162
  res->next = atomic_swap(l, res);
163
}
164

165
/**
166
 * @brief Repartition the cells amongst the nodes.
167
168
169
 *
 * @param e The #engine.
 */
170
void engine_repartition(struct engine *e) {
171

172
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
173

174
175
  ticks tic = getticks();

176
#ifdef SWIFT_DEBUG_CHECKS
177
178
179
180
  /* Be verbose about this. */
  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
  fflush(stdout);

181
  /* Check that all cells have been drifted to the current time */
182
  space_check_drift_point(e->s, e->ti_current, /*check_multipoles=*/0);
183
184
#endif

185
  /* Clear the repartition flag. */
186
  e->forcerepart = 0;
187

188
  /* Nothing to do if only using a single node. Also avoids METIS
189
   * bug that doesn't handle this case well. */
190
  if (e->nr_nodes == 1) return;
191

192
193
  /* Generate the fixed costs include file. */
  if (e->step > 3 && e->reparttype->trigger <= 1.f) {
Peter W. Draper's avatar
Peter W. Draper committed
194
195
    task_dump_stats("partition_fixed_costs.h", e, /* header = */ 1,
                    /* allranks = */ 1);
196
197
  }

198
  /* Do the repartitioning. */
199
  partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
200
                        e->sched.tasks, e->sched.nr_tasks);
201

202
203
  /* Partitioning requires copies of the particles, so we need to reduce the
   * memory in use to the minimum, we can free the sorting indices and the
204
205
   * tasks as these will be regenerated at the next rebuild. Also the foreign
   * particle arrays can go as these will be regenerated in proxy exchange. */
206
207

  /* Sorting indices. */
208
  if (e->s->cells_top != NULL) space_free_cells(e->s);
209

210
211
  /* Task arrays. */
  scheduler_free_tasks(&e->sched);
212

213
214
215
  /* Foreign parts. */
  space_free_foreign_parts(e->s);

216
217
218
  /* Now comes the tricky part: Exchange particles between all nodes.
     This is done in two steps, first allreducing a matrix of
     how many particles go from where to where, then re-allocating
219
     the parts array, and emitting the sends and receives.
220
221
222
223
224
225
226
227
228
229
230
     Finally, the space, tasks, and proxies need to be rebuilt. */

  /* Redistribute the particles between the nodes. */
  engine_redistribute(e);

  /* Make the proxies. */
  engine_makeproxies(e);

  /* Tell the engine it should re-build whenever possible */
  e->forcerebuild = 1;

231
232
233
  /* Flag that a repartition has taken place */
  e->step_props |= engine_step_prop_repartition;

234
235
236
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
237
#else
238
  if (e->reparttype->type != REPART_NONE)
239
    error("SWIFT was not compiled with MPI and METIS or ParMETIS support.");
240
241
242

  /* Clear the repartition flag. */
  e->forcerepart = 0;
243
#endif
244
}
245

246
247
248
249
250
251
252
253
254
/**
 * @brief Decide whether trigger a repartition the cells amongst the nodes.
 *
 * @param e The #engine.
 */
void engine_repartition_trigger(struct engine *e) {

#ifdef WITH_MPI

255
256
  const ticks tic = getticks();

257
258
259
  /* Do nothing if there have not been enough steps since the last repartition
   * as we don't want to repeat this too often or immediately after a
   * repartition step. Also nothing to do when requested. */
260
261
  if (e->step - e->last_repartition >= 2 &&
      e->reparttype->type != REPART_NONE) {
262

263
264
265
266
267
    /* If we have fixed costs available and this is step 2 or we are forcing
     * repartitioning then we do a fixed costs one now. */
    if (e->reparttype->trigger > 1 ||
        (e->step == 2 && e->reparttype->use_fixed_costs)) {

268
      if (e->reparttype->trigger > 1) {
Matthieu Schaller's avatar
Matthieu Schaller committed
269
        if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1;
270
271
272
      } else {
        e->forcerepart = 1;
      }
273
      e->reparttype->use_ticks = 0;
274
275
276

    } else {

277
278
279
      /* It is only worth checking the CPU loads when we have processed a
       * significant number of all particles as we require all tasks to have
       * timings. */
280
281
282
283
284
      if ((e->updates > 1 &&
           e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
          (e->g_updates > 1 &&
           e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {

285
286
287
288
289
290
        /* Should we are use the task timings or fixed costs. */
        if (e->reparttype->use_fixed_costs > 1) {
          e->reparttype->use_ticks = 0;
        } else {
          e->reparttype->use_ticks = 1;
        }
291

292
        /* Get CPU time used since the last call to this function. */
Peter W. Draper's avatar
Peter W. Draper committed
293
294
        double elapsed_cputime =
            clocks_get_cputime_used() - e->cputime_last_step;
295
296
297

        /* Gather the elapsed CPU times from all ranks for the last step. */
        double elapsed_cputimes[e->nr_nodes];
Peter W. Draper's avatar
Peter W. Draper committed
298
299
        MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
                   MPI_DOUBLE, 0, MPI_COMM_WORLD);
300
301
        if (e->nodeID == 0) {

302
          /* Get the range and mean of cputimes. */
303
304
          double mintime = elapsed_cputimes[0];
          double maxtime = elapsed_cputimes[0];
305
          double sum = elapsed_cputimes[0];
306
307
308
          for (int k = 1; k < e->nr_nodes; k++) {
            if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
            if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
309
            sum += elapsed_cputimes[k];
310
          }
311
          double mean = sum / (double)e->nr_nodes;
312
313

          /* Are we out of balance? */
314
315
          double abs_trigger = fabs(e->reparttype->trigger);
          if (((maxtime - mintime) / mean) > abs_trigger) {
316
            if (e->verbose)
Peter W. Draper's avatar
Peter W. Draper committed
317
318
              message("trigger fraction %.3f > %.3f will repartition",
                      (maxtime - mintime) / mean, abs_trigger);
319
            e->forcerepart = 1;
320
          } else {
Peter W. Draper's avatar
Peter W. Draper committed
321
            if (e->verbose)
Peter W. Draper's avatar
Peter W. Draper committed
322
323
              message("trigger fraction %.3f =< %.3f will not repartition",
                      (maxtime - mintime) / mean, abs_trigger);
324
325
326
          }
        }
      }
327
328
329

      /* All nodes do this together. */
      MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
330
331
332
    }

    /* Remember we did this. */
Peter W. Draper's avatar
Peter W. Draper committed
333
    if (e->forcerepart) e->last_repartition = e->step;
334
335
  }

336
337
338
  /* We always reset CPU time for next check, unless it will not be used. */
  if (e->reparttype->type != REPART_NONE)
    e->cputime_last_step = clocks_get_cputime_used();
339
340
341
342

  if (e->verbose)
    message("took %.3f %s", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
343
344
345
#endif
}

346
/**
347
348
349
350
 * @brief Exchange cell structures with other nodes.
 *
 * @param e The #engine.
 */
351
void engine_exchange_cells(struct engine *e) {
352

353
#ifdef WITH_MPI
354

355
  const int with_gravity = e->policy & engine_policy_self_gravity;
356
  const ticks tic = getticks();
357

358
  /* Exchange the cell structure with neighbouring ranks. */
359
  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
360

361
362
363
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
364
365
366
367
368
369
370

#else
  error("SWIFT was not compiled with MPI support.");
#endif
}

/**
371
 * @brief Exchange straying particles with other nodes.
372
373
 *
 * @param e The #engine.
Pedro Gonnet's avatar
Pedro Gonnet committed
374
 * @param offset_parts The index in the parts array as of which the foreign
375
 *        parts reside (i.e. the current number of local #part).
376
 * @param ind_part The foreign #cell ID of each part.
377
 * @param Npart The number of stray parts, contains the number of parts received
378
 *        on return.
Pedro Gonnet's avatar
Pedro Gonnet committed
379
 * @param offset_gparts The index in the gparts array as of which the foreign
380
 *        parts reside (i.e. the current number of local #gpart).
381
 * @param ind_gpart The foreign #cell ID of each gpart.
Pedro Gonnet's avatar
Pedro Gonnet committed
382
 * @param Ngpart The number of stray gparts, contains the number of gparts
Pedro Gonnet's avatar
Pedro Gonnet committed
383
 *        received on return.
384
 * @param offset_sparts The index in the sparts array as of which the foreign
385
 *        parts reside (i.e. the current number of local #spart).
386
387
388
 * @param ind_spart The foreign #cell ID of each spart.
 * @param Nspart The number of stray sparts, contains the number of sparts
 *        received on return.
389
390
391
392
393
 * @param offset_bparts The index in the bparts array as of which the foreign
 *        parts reside (i.e. the current number of local #bpart).
 * @param ind_bpart The foreign #cell ID of each bpart.
 * @param Nbpart The number of stray bparts, contains the number of bparts
 *        received on return.
394
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
395
396
 * Note that this function does not mess-up the linkage between parts and
 * gparts, i.e. the received particles have correct linkeage.
397
 */
398
399
400
401
void engine_exchange_strays(struct engine *e, const size_t offset_parts,
                            const int *ind_part, size_t *Npart,
                            const size_t offset_gparts, const int *ind_gpart,
                            size_t *Ngpart, const size_t offset_sparts,
402
403
404
                            const int *ind_spart, size_t *Nspart,
                            const size_t offset_bparts, const int *ind_bpart,
                            size_t *Nbpart) {
405
406
407

#ifdef WITH_MPI

408
  struct space *s = e->s;
409
  ticks tic = getticks();
410
411

  /* Re-set the proxies. */
Pedro Gonnet's avatar
Pedro Gonnet committed
412
413
414
  for (int k = 0; k < e->nr_proxies; k++) {
    e->proxies[k].nr_parts_out = 0;
    e->proxies[k].nr_gparts_out = 0;
415
    e->proxies[k].nr_sparts_out = 0;
416
    e->proxies[k].nr_bparts_out = 0;
Pedro Gonnet's avatar
Pedro Gonnet committed
417
  }
418

419
  /* Put the parts into the corresponding proxies. */
420
  for (size_t k = 0; k < *Npart; k++) {
421
422
423
424

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_part[k] == -1) continue;

425
    /* Get the target node and proxy ID. */
426
    const int node_id = e->s->cells_top[ind_part[k]].nodeID;
427
428
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
429
    const int pid = e->proxy_ind[node_id];
430
    if (pid < 0) {
431
432
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
433
          "id=%lld, x=[%e,%e,%e].",
Pedro Gonnet's avatar
Pedro Gonnet committed
434
435
436
          node_id, s->parts[offset_parts + k].id,
          s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1],
          s->parts[offset_parts + k].x[2]);
437
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
438

439
440
    /* Re-link the associated gpart with the buffer offset of the part. */
    if (s->parts[offset_parts + k].gpart != NULL) {
441
442
      s->parts[offset_parts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_parts_out;
443
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
444

445
446
447
448
449
#ifdef SWIFT_DEBUG_CHECKS
    if (s->parts[offset_parts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

450
    /* Load the part and xpart into the proxy. */
451
452
453
    proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k],
                     &s->xparts[offset_parts + k], 1);
  }
454

455
  /* Put the sparts into the corresponding proxies. */
456
  for (size_t k = 0; k < *Nspart; k++) {
457
458
459
460
461

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_spart[k] == -1) continue;

    /* Get the target node and proxy ID. */
462
463
464
465
    const int node_id = e->s->cells_top[ind_spart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
466
    if (pid < 0) {
467
468
469
470
471
472
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lld, x=[%e,%e,%e].",
          node_id, s->sparts[offset_sparts + k].id,
          s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1],
          s->sparts[offset_sparts + k].x[2]);
473
    }
474

475
    /* Re-link the associated gpart with the buffer offset of the spart. */
476
477
478
479
480
    if (s->sparts[offset_sparts + k].gpart != NULL) {
      s->sparts[offset_sparts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_sparts_out;
    }

481
482
483
484
485
#ifdef SWIFT_DEBUG_CHECKS
    if (s->sparts[offset_sparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

486
487
488
    /* Load the spart into the proxy */
    proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1);
  }
489

490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
  /* Put the bparts into the corresponding proxies. */
  for (size_t k = 0; k < *Nbpart; k++) {

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_bpart[k] == -1) continue;

    /* Get the target node and proxy ID. */
    const int node_id = e->s->cells_top[ind_bpart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
    if (pid < 0) {
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lld, x=[%e,%e,%e].",
          node_id, s->bparts[offset_bparts + k].id,
          s->bparts[offset_bparts + k].x[0], s->bparts[offset_bparts + k].x[1],
          s->bparts[offset_bparts + k].x[2]);
    }

    /* Re-link the associated gpart with the buffer offset of the bpart. */
    if (s->bparts[offset_bparts + k].gpart != NULL) {
      s->bparts[offset_bparts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_bparts_out;
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (s->bparts[offset_bparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

    /* Load the bpart into the proxy */
    proxy_bparts_load(&e->proxies[pid], &s->bparts[offset_bparts + k], 1);
  }
524

525
526
  /* Put the gparts into the corresponding proxies. */
  for (size_t k = 0; k < *Ngpart; k++) {
527
528
529
530
531

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_gpart[k] == -1) continue;

    /* Get the target node and proxy ID. */
532
533
534
535
    const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
536
    if (pid < 0) {
537
538
539
540
541
542
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lli, x=[%e,%e,%e].",
          node_id, s->gparts[offset_gparts + k].id_or_neg_offset,
          s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1],
          s->gparts[offset_gparts + k].x[2]);
543
544
545
546
547
548
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (s->gparts[offset_gparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif
549
550
551
552
553

    /* Load the gpart into the proxy */
    proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1);
  }

554
  /* Launch the proxies. */
555
556
  MPI_Request reqs_in[5 * engine_maxproxies];
  MPI_Request reqs_out[5 * engine_maxproxies];
557
  for (int k = 0; k < e->nr_proxies; k++) {
558
    proxy_parts_exchange_first(&e->proxies[k]);
559
560
561
562
563
    reqs_in[k] = e->proxies[k].req_parts_count_in;
    reqs_out[k] = e->proxies[k].req_parts_count_out;
  }

  /* Wait for each count to come in and start the recv. */
564
  for (int k = 0; k < e->nr_proxies; k++) {
565
    int pid = MPI_UNDEFINED;
566
567
    if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) !=
            MPI_SUCCESS ||
568
569
570
        pid == MPI_UNDEFINED)
      error("MPI_Waitany failed.");
    // message( "request from proxy %i has arrived." , pid );
571
    proxy_parts_exchange_second(&e->proxies[pid]);
572
  }
573

574
  /* Wait for all the sends to have finished too. */
575
576
577
  if (MPI_Waitall(e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
    error("MPI_Waitall on sends failed.");

578
  /* Count the total number of incoming particles and make sure we have
579
     enough space to accommodate them. */
580
581
  int count_parts_in = 0;
  int count_gparts_in = 0;
582
  int count_sparts_in = 0;
583
  int count_bparts_in = 0;
584
585
586
  for (int k = 0; k < e->nr_proxies; k++) {
    count_parts_in += e->proxies[k].nr_parts_in;
    count_gparts_in += e->proxies[k].nr_gparts_in;
587
    count_sparts_in += e->proxies[k].nr_sparts_in;
588
    count_bparts_in += e->proxies[k].nr_bparts_in;
589
590
  }
  if (e->verbose) {
591
592
593
594
595
    message(
        "sent out %zu/%zu/%zu/%zu parts/gparts/sparts/bparts, got %i/%i/%i/%i "
        "back.",
        *Npart, *Ngpart, *Nspart, *Nbpart, count_parts_in, count_gparts_in,
        count_sparts_in, count_bparts_in);
596
  }
597
598

  /* Reallocate the particle arrays if necessary */
599
  if (offset_parts + count_parts_in > s->size_parts) {
600
    s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow;
601
602
    struct part *parts_new = NULL;
    struct xpart *xparts_new = NULL;
603
    if (swift_memalign("parts", (void **)&parts_new, part_align,
604
                       sizeof(struct part) * s->size_parts) != 0 ||
605
        swift_memalign("xparts", (void **)&xparts_new, xpart_align,
606
607
                       sizeof(struct xpart) * s->size_parts) != 0)
      error("Failed to allocate new part data.");
608
609
    memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts);
    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts);
610
611
    swift_free("parts", s->parts);
    swift_free("xparts", s->xparts);
612
613
    s->parts = parts_new;
    s->xparts = xparts_new;
614
615

    /* Reset the links */
Pedro Gonnet's avatar
Pedro Gonnet committed
616
617
    for (size_t k = 0; k < offset_parts; k++) {
      if (s->parts[k].gpart != NULL) {
618
        s->parts[k].gpart->id_or_neg_offset = -k;
Pedro Gonnet's avatar
Pedro Gonnet committed
619
620
      }
    }
621
  }
622

623
624
625
  if (offset_sparts + count_sparts_in > s->size_sparts) {
    s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow;
    struct spart *sparts_new = NULL;
626
    if (swift_memalign("sparts", (void **)&sparts_new, spart_align,
627
628
629
                       sizeof(struct spart) * s->size_sparts) != 0)
      error("Failed to allocate new spart data.");
    memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts);
630
    swift_free("sparts", s->sparts);
631
    s->sparts = sparts_new;
632
633

    /* Reset the links */
634
    for (size_t k = 0; k < offset_sparts; k++) {
635
636
637
638
639
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
    }
  }
640

641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
  if (offset_bparts + count_bparts_in > s->size_bparts) {
    s->size_bparts = (offset_bparts + count_bparts_in) * engine_parts_size_grow;
    struct bpart *bparts_new = NULL;
    if (swift_memalign("bparts", (void **)&bparts_new, bpart_align,
                       sizeof(struct bpart) * s->size_bparts) != 0)
      error("Failed to allocate new bpart data.");
    memcpy(bparts_new, s->bparts, sizeof(struct bpart) * offset_bparts);
    swift_free("bparts", s->bparts);
    s->bparts = bparts_new;

    /* Reset the links */
    for (size_t k = 0; k < offset_bparts; k++) {
      if (s->bparts[k].gpart != NULL) {
        s->bparts[k].gpart->id_or_neg_offset = -k;
      }
    }
  }
658

659
  if (offset_gparts + count_gparts_in > s->size_gparts) {
660
    s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow;
661
    struct gpart *gparts_new = NULL;
662
    if (swift_memalign("gparts", (void **)&gparts_new, gpart_align,
663
664
665
                       sizeof(struct gpart) * s->size_gparts) != 0)
      error("Failed to allocate new gpart data.");
    memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts);
666
    swift_free("gparts", s->gparts);
667
    s->gparts = gparts_new;
668

669
    /* Reset the links */
Pedro Gonnet's avatar
Pedro Gonnet committed
670
    for (size_t k = 0; k < offset_gparts; k++) {
671
      if (s->gparts[k].type == swift_type_gas) {
672
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
673
      } else if (s->gparts[k].type == swift_type_stars) {
674
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
675
676
      } else if (s->gparts[k].type == swift_type_black_hole) {
        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
Pedro Gonnet's avatar
Pedro Gonnet committed
677
678
      }
    }
679
  }
680
681

  /* Collect the requests for the particle data from the proxies. */
682
683
  int nr_in = 0, nr_out = 0;
  for (int k = 0; k < e->nr_proxies; k++) {
684
    if (e->proxies[k].nr_parts_in > 0) {
685
686
      reqs_in[5 * k] = e->proxies[k].req_parts_in;
      reqs_in[5 * k + 1] = e->proxies[k].req_xparts_in;
687
688
      nr_in += 2;
    } else {
689
      reqs_in[5 * k] = reqs_in[5 * k + 1] = MPI_REQUEST_NULL;
690
691
    }
    if (e->proxies[k].nr_gparts_in > 0) {
692
      reqs_in[5 * k + 2] = e->proxies[k].req_gparts_in;
693
      nr_in += 1;
694
    } else {
695
      reqs_in[5 * k + 2] = MPI_REQUEST_NULL;
696
    }
697
    if (e->proxies[k].nr_sparts_in > 0) {
698
      reqs_in[5 * k + 3] = e->proxies[k].req_sparts_in;
699
700
      nr_in += 1;
    } else {
701
702
703
704
705
706
707
      reqs_in[5 * k + 3] = MPI_REQUEST_NULL;
    }
    if (e->proxies[k].nr_bparts_in > 0) {
      reqs_in[5 * k + 4] = e->proxies[k].req_bparts_in;
      nr_in += 1;
    } else {
      reqs_in[5 * k + 4] = MPI_REQUEST_NULL;
708
709
    }

710
    if (e->proxies[k].nr_parts_out > 0) {
711
712
      reqs_out[5 * k] = e->proxies[k].req_parts_out;
      reqs_out[5 * k + 1] = e->proxies[k].req_xparts_out;
713
714
      nr_out += 2;
    } else {
715
      reqs_out[5 * k] = reqs_out[5 * k + 1] = MPI_REQUEST_NULL;
716
717
    }
    if (e->proxies[k].nr_gparts_out > 0) {
718
      reqs_out[5 * k + 2] = e->proxies[k].req_gparts_out;
719
720
      nr_out += 1;
    } else {
721
      reqs_out[5 * k + 2] = MPI_REQUEST_NULL;
722
723
    }
    if (e->proxies[k].nr_sparts_out > 0) {
724
725
726
727
728
729
730
      reqs_out[5 * k + 3] = e->proxies[k].req_sparts_out;
      nr_out += 1;
    } else {
      reqs_out[5 * k + 3] = MPI_REQUEST_NULL;
    }
    if (e->proxies[k].nr_bparts_out > 0) {
      reqs_out[5 * k + 4] = e->proxies[k].req_bparts_out;
731
      nr_out += 1;
732
    } else {
733
      reqs_out[5 * k + 4] = MPI_REQUEST_NULL;
734
    }
735
736
737
738
  }

  /* Wait for each part array to come in and collect the new
     parts from the proxies. */
739
  int count_parts = 0, count_gparts = 0, count_sparts = 0, count_bparts = 0;
740
  for (int k = 0; k < nr_in; k++) {
741
    int err, pid;
742
    if ((err = MPI_Waitany(5 * e->nr_proxies, reqs_in, &pid,
743
                           MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
744
745
746
747
748
749
      char buff[MPI_MAX_ERROR_STRING];
      int res;
      MPI_Error_string(err, buff, &res);
      error("MPI_Waitany failed (%s).", buff);
    }
    if (pid == MPI_UNDEFINED) break;
750
751
    // message( "request from proxy %i has arrived." , pid / 5 );
    pid = 5 * (pid / 5);
Pedro Gonnet's avatar
Pedro Gonnet committed
752

753
    /* If all the requests for a given proxy have arrived... */
754
755
    if (reqs_in[pid + 0] == MPI_REQUEST_NULL &&
        reqs_in[pid + 1] == MPI_REQUEST_NULL &&
756
        reqs_in[pid + 2] == MPI_REQUEST_NULL &&
757
        reqs_in[pid + 3] == MPI_REQUEST_NULL &&
758
        reqs_in[pid + 4] == MPI_REQUEST_NULL) {
759
      /* Copy the particle data to the part/xpart/gpart arrays. */
760
      struct proxy *prox = &e->proxies[pid / 5];
761
762
763
764
765
766
      memcpy(&s->parts[offset_parts + count_parts], prox->parts_in,
             sizeof(struct part) * prox->nr_parts_in);
      memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in,
             sizeof(struct xpart) * prox->nr_parts_in);
      memcpy(&s->gparts[offset_gparts + count_gparts], prox->gparts_in,
             sizeof(struct gpart) * prox->nr_gparts_in);
767
768
      memcpy(&s->sparts[offset_sparts + count_sparts], prox->sparts_in,
             sizeof(struct spart) * prox->nr_sparts_in);
769
770
      memcpy(&s->bparts[offset_bparts + count_bparts], prox->bparts_in,
             sizeof(struct bpart) * prox->nr_bparts_in);
771
772
      /* for (int k = offset; k < offset + count; k++)
         message(
773
            "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.",
774
            s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1],
775
            s->parts[k].x[2], s->parts[k].h, p->nodeID); */
Pedro Gonnet's avatar
Pedro Gonnet committed
776

777
      /* Re-link the gparts. */
778
779
      for (int kk = 0; kk < prox->nr_gparts_in; kk++) {
        struct gpart *gp = &s->gparts[offset_gparts + count_gparts + kk];
780
781

        if (gp->type == swift_type_gas) {
782
          struct part *p =
783
              &s->parts[offset_parts + count_parts - gp->id_or_neg_offset];
784
          gp->id_or_neg_offset = s->parts - p;
785
          p->gpart = gp;
786
        } else if (gp->type == swift_type_stars) {
787
788
789
790
          struct spart *sp =
              &s->sparts[offset_sparts + count_sparts - gp->id_or_neg_offset];
          gp->id_or_neg_offset = s->sparts - sp;
          sp->gpart = gp;
791
792
793
794
795
        } else if (gp->type == swift_type_black_hole) {
          struct bpart *bp =
              &s->bparts[offset_bparts + count_bparts - gp->id_or_neg_offset];
          gp->id_or_neg_offset = s->bparts - bp;
          bp->gpart = gp;
796
797
        }
      }
Pedro Gonnet's avatar
Pedro Gonnet committed
798

799
      /* Advance the counters. */
800
801
      count_parts += prox->nr_parts_in;
      count_gparts += prox->nr_gparts_in;
802
      count_sparts += prox->nr_sparts_in;
803
      count_bparts += prox->nr_bparts_in;
804
    }
805
  }
Pedro Gonnet's avatar
Pedro Gonnet committed
806

807
  /* Wait for all the sends to have finished too. */
Pedro Gonnet's avatar
Pedro Gonnet committed
808
  if (nr_out > 0)
809
    if (MPI_Waitall(5 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) !=
Pedro Gonnet's avatar
Pedro Gonnet committed
810
811
812
        MPI_SUCCESS)
      error("MPI_Waitall on sends failed.");

813
814
815
816
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());

817
  /* Return the number of harvested parts. */
818
819
  *Npart = count_parts;
  *Ngpart = count_gparts;
820
  *Nspart = count_sparts;
821
  *Nbpart = count_bparts;
Pedro Gonnet's avatar
Pedro Gonnet committed
822

823
#else
Pedro Gonnet's avatar
Pedro Gonnet committed
824
  error("SWIFT was not compiled with MPI support.");
825
826
#endif
}
827

828
829
830
831
832
833
834
/**
 * @brief Exchanges the top-level multipoles between all the nodes
 * such that every node has a multipole for each top-level cell.
 *
 * @param e The #engine.
 */
void engine_exchange_top_multipoles(struct engine *e) {
Matthieu Schaller's avatar
Matthieu Schaller committed
835

836
837
#ifdef WITH_MPI

838
839
  ticks tic = getticks();

840
#ifdef SWIFT_DEBUG_CHECKS
Matthieu Schaller's avatar
Matthieu Schaller committed
841
842
843
844
845
846
847
848
849
850
  for (int i = 0; i < e->s->nr_cells; ++i) {
    const struct gravity_tensors *m = &e->s->multipoles_top[i];
    if (e->s->cells_top[i].nodeID == engine_rank) {
      if (m->m_pole.M_000 > 0.) {
        if (m->CoM[0] < 0. || m->CoM[0] > e->s->dim[0])
          error("Invalid multipole position in X");
        if (m->CoM[1] < 0. || m->CoM[1] > e->s->dim[1])
          error("Invalid multipole position in Y");
        if (m->CoM[2] < 0. || m->CoM[2] > e->s->dim[2])
          error("Invalid multipole position in Z");
851
852
      }
    } else {
Matthieu Schaller's avatar
Matthieu Schaller committed
853
854
855
856
857
858