engine.c 172 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
36
#include <sys/stat.h>
37
#include <sys/types.h>
38
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
39

40
41
/* MPI headers. */
#ifdef WITH_MPI
42

43
#include <mpi.h>
44
45
#endif

Angus Lepper's avatar
Angus Lepper committed
46
47
#ifdef HAVE_LIBNUMA
#include <numa.h>
48
49
#endif

50
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
51
#include "engine.h"
52
53

/* Local headers. */
54
#include "active.h"
55
#include "atomic.h"
56
#include "black_holes_properties.h"
57
#include "cell.h"
58
#include "chemistry.h"
59
#include "clocks.h"
60
#include "cooling.h"
61
#include "cosmology.h"
62
63
#include "cycle.h"
#include "debug.h"
64
#include "entropy_floor.h"
65
#include "equation_of_state.h"
66
#include "error.h"
Alexei Borissov's avatar
Alexei Borissov committed
67
#include "feedback.h"
Matthieu Schaller's avatar
Matthieu Schaller committed
68
#include "fof.h"
69
#include "gravity.h"
70
#include "gravity_cache.h"
71
#include "hydro.h"
lhausamm's avatar
lhausamm committed
72
#include "logger.h"
lhausamm's avatar
lhausamm committed
73
#include "logger_io.h"
74
#include "map.h"
75
#include "memuse.h"
76
#include "minmax.h"
77
#include "mpiuse.h"
78
#include "outputlist.h"
79
#include "parallel_io.h"
80
#include "part.h"
81
#include "partition.h"
James Willis's avatar
James Willis committed
82
#include "profiler.h"
83
#include "proxy.h"
84
#include "restart.h"
85
#include "runner.h"
86
87
#include "serial_io.h"
#include "single_io.h"
88
#include "sort_part.h"
89
#include "star_formation.h"
90
#include "star_formation_logger.h"
Loic Hausammann's avatar
Loic Hausammann committed
91
#include "stars_io.h"
92
#include "statistics.h"
93
#include "timers.h"
94
#include "tools.h"
95
#include "units.h"
96
#include "velociraptor_interface.h"
97
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
98

99
100
101
/* Particle cache size. */
#define CACHE_SIZE 512

102
103
104
105
106
const char *engine_policy_names[] = {"none",
                                     "rand",
                                     "steal",
                                     "keep",
                                     "block",
107
                                     "cpu tight",
108
                                     "mpi",
109
                                     "numa affinity",
110
                                     "hydro",
111
112
113
114
115
                                     "self gravity",
                                     "external gravity",
                                     "cosmological integration",
                                     "drift everything",
                                     "reconstruct multi-poles",
116
                                     "temperature",
117
                                     "cooling",
James Willis's avatar
James Willis committed
118
                                     "stars",
119
                                     "structure finding",
120
                                     "star formation",
121
                                     "feedback",
122
                                     "black holes",
123
                                     "fof search",
124
                                     "time-step limiter",
Loic Hausammann's avatar
Loic Hausammann committed
125
                                     "time-step sync",
Loic Hausammann's avatar
Format    
Loic Hausammann committed
126
                                     "logger"};
Pedro Gonnet's avatar
Pedro Gonnet committed
127

128
129
130
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

131
/** The current step of the engine as a global variable (for messages). */
132
int engine_current_step;
133

134
135
136
extern int engine_max_parts_per_ghost;
extern int engine_max_sparts_per_ghost;

137
138
139
140
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
141
 * @param l A pointer to the #link, will be modified atomically.
142
143
144
145
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
146
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
147

148
149
150
151
152
153
#ifdef SWIFT_DEBUG_CHECKS
  if (t == NULL) {
    error("Trying to link NULL task.");
  }
#endif

154
  /* Get the next free link. */
155
  const size_t ind = atomic_inc(&e->nr_links);
156
  if (ind >= e->size_links) {
157
158
159
    error(
        "Link table overflow. Increase the value of "
        "`Scheduler:links_per_tasks`.");
160
161
  }
  struct link *res = &e->links[ind];
162

163
  /* Set it atomically. */
164
  res->t = t;
165
  res->next = atomic_swap(l, res);
166
}
167

168
/**
169
 * @brief Repartition the cells amongst the nodes.
170
171
172
 *
 * @param e The #engine.
 */
173
void engine_repartition(struct engine *e) {
174

175
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
176

177
178
  ticks tic = getticks();

179
#ifdef SWIFT_DEBUG_CHECKS
180
181
182
183
  /* Be verbose about this. */
  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
  fflush(stdout);

184
  /* Check that all cells have been drifted to the current time */
185
  space_check_drift_point(e->s, e->ti_current, /*check_multipoles=*/0);
186
187
#endif

188
  /* Clear the repartition flag. */
189
  e->forcerepart = 0;
190

191
  /* Nothing to do if only using a single node. Also avoids METIS
192
   * bug that doesn't handle this case well. */
193
  if (e->nr_nodes == 1) return;
194

195
196
  /* Generate the fixed costs include file. */
  if (e->step > 3 && e->reparttype->trigger <= 1.f) {
Peter W. Draper's avatar
Peter W. Draper committed
197
198
    task_dump_stats("partition_fixed_costs.h", e, /* header = */ 1,
                    /* allranks = */ 1);
199
200
  }

201
  /* Do the repartitioning. */
202
  partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
203
                        e->sched.tasks, e->sched.nr_tasks);
204

205
206
  /* Partitioning requires copies of the particles, so we need to reduce the
   * memory in use to the minimum, we can free the sorting indices and the
207
208
   * tasks as these will be regenerated at the next rebuild. Also the foreign
   * particle arrays can go as these will be regenerated in proxy exchange. */
209
210

  /* Sorting indices. */
211
  if (e->s->cells_top != NULL) space_free_cells(e->s);
212

213
214
  /* Task arrays. */
  scheduler_free_tasks(&e->sched);
215

216
217
218
  /* Foreign parts. (no need to nullify the cell pointers as the cells
   * will be regenerated) */
  space_free_foreign_parts(e->s, /*clear_cell_pointers=*/0);
219

220
221
222
  /* Now comes the tricky part: Exchange particles between all nodes.
     This is done in two steps, first allreducing a matrix of
     how many particles go from where to where, then re-allocating
223
     the parts array, and emitting the sends and receives.
224
225
226
     Finally, the space, tasks, and proxies need to be rebuilt. */

  /* Redistribute the particles between the nodes. */
Loic Hausammann's avatar
Loic Hausammann committed
227
  engine_redistribute(e);
228
229
230
231
232
233
234

  /* Make the proxies. */
  engine_makeproxies(e);

  /* Tell the engine it should re-build whenever possible */
  e->forcerebuild = 1;

235
236
237
  /* Flag that a repartition has taken place */
  e->step_props |= engine_step_prop_repartition;

238
239
240
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
241
#else
242
  if (e->reparttype->type != REPART_NONE)
243
    error("SWIFT was not compiled with MPI and METIS or ParMETIS support.");
244
245
246

  /* Clear the repartition flag. */
  e->forcerepart = 0;
247
#endif
248
}
249

250
251
252
253
254
255
256
257
258
/**
 * @brief Decide whether trigger a repartition the cells amongst the nodes.
 *
 * @param e The #engine.
 */
void engine_repartition_trigger(struct engine *e) {

#ifdef WITH_MPI

259
260
  const ticks tic = getticks();

261
262
263
  /* Do nothing if there have not been enough steps since the last repartition
   * as we don't want to repeat this too often or immediately after a
   * repartition step. Also nothing to do when requested. */
264
265
  if (e->step - e->last_repartition >= 2 &&
      e->reparttype->type != REPART_NONE) {
266

267
268
269
270
271
    /* If we have fixed costs available and this is step 2 or we are forcing
     * repartitioning then we do a fixed costs one now. */
    if (e->reparttype->trigger > 1 ||
        (e->step == 2 && e->reparttype->use_fixed_costs)) {

272
      if (e->reparttype->trigger > 1) {
Matthieu Schaller's avatar
Matthieu Schaller committed
273
        if ((e->step % (int)e->reparttype->trigger) == 0) e->forcerepart = 1;
274
275
276
      } else {
        e->forcerepart = 1;
      }
277
      e->reparttype->use_ticks = 0;
278
279
280

    } else {

281
282
283
      /* It is only worth checking the CPU loads when we have processed a
       * significant number of all particles as we require all tasks to have
       * timings. */
284
285
286
287
288
      if ((e->updates > 1 &&
           e->updates >= e->total_nr_parts * e->reparttype->minfrac) ||
          (e->g_updates > 1 &&
           e->g_updates >= e->total_nr_gparts * e->reparttype->minfrac)) {

289
290
291
292
293
294
        /* Should we are use the task timings or fixed costs. */
        if (e->reparttype->use_fixed_costs > 1) {
          e->reparttype->use_ticks = 0;
        } else {
          e->reparttype->use_ticks = 1;
        }
295

296
        /* Get CPU time used since the last call to this function. */
Peter W. Draper's avatar
Peter W. Draper committed
297
298
        double elapsed_cputime =
            clocks_get_cputime_used() - e->cputime_last_step;
299
300
301

        /* Gather the elapsed CPU times from all ranks for the last step. */
        double elapsed_cputimes[e->nr_nodes];
Peter W. Draper's avatar
Peter W. Draper committed
302
303
        MPI_Gather(&elapsed_cputime, 1, MPI_DOUBLE, elapsed_cputimes, 1,
                   MPI_DOUBLE, 0, MPI_COMM_WORLD);
304
305
        if (e->nodeID == 0) {

306
          /* Get the range and mean of cputimes. */
307
308
          double mintime = elapsed_cputimes[0];
          double maxtime = elapsed_cputimes[0];
309
          double sum = elapsed_cputimes[0];
310
311
312
          for (int k = 1; k < e->nr_nodes; k++) {
            if (elapsed_cputimes[k] > maxtime) maxtime = elapsed_cputimes[k];
            if (elapsed_cputimes[k] < mintime) mintime = elapsed_cputimes[k];
313
            sum += elapsed_cputimes[k];
314
          }
315
          double mean = sum / (double)e->nr_nodes;
316
317

          /* Are we out of balance? */
318
319
          double abs_trigger = fabs(e->reparttype->trigger);
          if (((maxtime - mintime) / mean) > abs_trigger) {
320
            if (e->verbose)
Peter W. Draper's avatar
Peter W. Draper committed
321
322
              message("trigger fraction %.3f > %.3f will repartition",
                      (maxtime - mintime) / mean, abs_trigger);
323
            e->forcerepart = 1;
324
          } else {
Peter W. Draper's avatar
Peter W. Draper committed
325
            if (e->verbose)
Peter W. Draper's avatar
Peter W. Draper committed
326
327
              message("trigger fraction %.3f =< %.3f will not repartition",
                      (maxtime - mintime) / mean, abs_trigger);
328
329
330
          }
        }
      }
331
332
333

      /* All nodes do this together. */
      MPI_Bcast(&e->forcerepart, 1, MPI_INT, 0, MPI_COMM_WORLD);
334
335
336
    }

    /* Remember we did this. */
Peter W. Draper's avatar
Peter W. Draper committed
337
    if (e->forcerepart) e->last_repartition = e->step;
338
339
  }

340
341
342
  /* We always reset CPU time for next check, unless it will not be used. */
  if (e->reparttype->type != REPART_NONE)
    e->cputime_last_step = clocks_get_cputime_used();
343
344
345
346

  if (e->verbose)
    message("took %.3f %s", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
347
348
349
#endif
}

350
/**
351
352
353
354
 * @brief Exchange cell structures with other nodes.
 *
 * @param e The #engine.
 */
355
void engine_exchange_cells(struct engine *e) {
356

357
#ifdef WITH_MPI
358

359
  const int with_gravity = e->policy & engine_policy_self_gravity;
360
  const ticks tic = getticks();
361

362
  /* Exchange the cell structure with neighbouring ranks. */
363
  proxy_cells_exchange(e->proxies, e->nr_proxies, e->s, with_gravity);
364

365
366
367
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
368
369
370
371
372
373
374

#else
  error("SWIFT was not compiled with MPI support.");
#endif
}

/**
375
 * @brief Exchange straying particles with other nodes.
376
377
 *
 * @param e The #engine.
Pedro Gonnet's avatar
Pedro Gonnet committed
378
 * @param offset_parts The index in the parts array as of which the foreign
379
 *        parts reside (i.e. the current number of local #part).
380
 * @param ind_part The foreign #cell ID of each part.
381
 * @param Npart The number of stray parts, contains the number of parts received
382
 *        on return.
Pedro Gonnet's avatar
Pedro Gonnet committed
383
 * @param offset_gparts The index in the gparts array as of which the foreign
384
 *        parts reside (i.e. the current number of local #gpart).
385
 * @param ind_gpart The foreign #cell ID of each gpart.
Pedro Gonnet's avatar
Pedro Gonnet committed
386
 * @param Ngpart The number of stray gparts, contains the number of gparts
Pedro Gonnet's avatar
Pedro Gonnet committed
387
 *        received on return.
388
 * @param offset_sparts The index in the sparts array as of which the foreign
389
 *        parts reside (i.e. the current number of local #spart).
390
391
392
 * @param ind_spart The foreign #cell ID of each spart.
 * @param Nspart The number of stray sparts, contains the number of sparts
 *        received on return.
393
394
395
396
397
 * @param offset_bparts The index in the bparts array as of which the foreign
 *        parts reside (i.e. the current number of local #bpart).
 * @param ind_bpart The foreign #cell ID of each bpart.
 * @param Nbpart The number of stray bparts, contains the number of bparts
 *        received on return.
398
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
399
400
 * Note that this function does not mess-up the linkage between parts and
 * gparts, i.e. the received particles have correct linkeage.
401
 */
402
403
404
405
void engine_exchange_strays(struct engine *e, const size_t offset_parts,
                            const int *ind_part, size_t *Npart,
                            const size_t offset_gparts, const int *ind_gpart,
                            size_t *Ngpart, const size_t offset_sparts,
406
407
408
                            const int *ind_spart, size_t *Nspart,
                            const size_t offset_bparts, const int *ind_bpart,
                            size_t *Nbpart) {
409
410

#ifdef WITH_MPI
411
  struct space *s = e->s;
412
  ticks tic = getticks();
413
414

  /* Re-set the proxies. */
Pedro Gonnet's avatar
Pedro Gonnet committed
415
416
417
  for (int k = 0; k < e->nr_proxies; k++) {
    e->proxies[k].nr_parts_out = 0;
    e->proxies[k].nr_gparts_out = 0;
418
    e->proxies[k].nr_sparts_out = 0;
419
    e->proxies[k].nr_bparts_out = 0;
Pedro Gonnet's avatar
Pedro Gonnet committed
420
  }
421

422
  /* Put the parts into the corresponding proxies. */
423
  for (size_t k = 0; k < *Npart; k++) {
424
425
426
427

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_part[k] == -1) continue;

428
    /* Get the target node and proxy ID. */
429
    const int node_id = e->s->cells_top[ind_part[k]].nodeID;
430
431
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
432
    const int pid = e->proxy_ind[node_id];
433
    if (pid < 0) {
434
435
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
436
          "id=%lld, x=[%e,%e,%e].",
Pedro Gonnet's avatar
Pedro Gonnet committed
437
438
439
          node_id, s->parts[offset_parts + k].id,
          s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1],
          s->parts[offset_parts + k].x[2]);
440
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
441

442
443
    /* Re-link the associated gpart with the buffer offset of the part. */
    if (s->parts[offset_parts + k].gpart != NULL) {
444
445
      s->parts[offset_parts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_parts_out;
446
    }
Pedro Gonnet's avatar
Pedro Gonnet committed
447

448
449
450
451
452
#ifdef SWIFT_DEBUG_CHECKS
    if (s->parts[offset_parts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

453
    /* Load the part and xpart into the proxy. */
454
455
    proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k],
                     &s->xparts[offset_parts + k], 1);
456
457

#ifdef WITH_LOGGER
Loic Hausammann's avatar
Loic Hausammann committed
458
    if (e->policy & engine_policy_logger) {
Loic Hausammann's avatar
Format    
Loic Hausammann committed
459
460
      const uint32_t logger_flag =
          logger_pack_flags_and_data(logger_flag_mpi_exit, node_id);
Loic Hausammann's avatar
Loic Hausammann committed
461
462

      /* Log the particle when leaving a rank. */
Loic Hausammann's avatar
Format    
Loic Hausammann committed
463
464
465
466
      logger_log_part(
          e->logger, &s->parts[offset_parts + k], &s->xparts[offset_parts + k],
          logger_masks_all_part | logger_mask_data[logger_special_flags].mask,
          logger_flag);
Loic Hausammann's avatar
Loic Hausammann committed
467
    }
468
#endif
469
  }
470

471
  /* Put the sparts into the corresponding proxies. */
472
  for (size_t k = 0; k < *Nspart; k++) {
473
474
475
476
477

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_spart[k] == -1) continue;

    /* Get the target node and proxy ID. */
478
479
480
481
    const int node_id = e->s->cells_top[ind_spart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
482
    if (pid < 0) {
483
484
485
486
487
488
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lld, x=[%e,%e,%e].",
          node_id, s->sparts[offset_sparts + k].id,
          s->sparts[offset_sparts + k].x[0], s->sparts[offset_sparts + k].x[1],
          s->sparts[offset_sparts + k].x[2]);
489
    }
490

491
    /* Re-link the associated gpart with the buffer offset of the spart. */
492
493
494
495
496
    if (s->sparts[offset_sparts + k].gpart != NULL) {
      s->sparts[offset_sparts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_sparts_out;
    }

497
498
499
500
501
#ifdef SWIFT_DEBUG_CHECKS
    if (s->sparts[offset_sparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

502
503
    /* Load the spart into the proxy */
    proxy_sparts_load(&e->proxies[pid], &s->sparts[offset_sparts + k], 1);
504
505

#ifdef WITH_LOGGER
Loic Hausammann's avatar
Loic Hausammann committed
506
    if (e->policy & engine_policy_logger) {
Loic Hausammann's avatar
Format    
Loic Hausammann committed
507
508
      const uint32_t logger_flag =
          logger_pack_flags_and_data(logger_flag_mpi_exit, node_id);
Loic Hausammann's avatar
Loic Hausammann committed
509
510

      /* Log the particle when leaving a rank. */
Loic Hausammann's avatar
Format    
Loic Hausammann committed
511
512
513
514
      logger_log_spart(
          e->logger, &s->sparts[offset_sparts + k],
          logger_masks_all_spart | logger_mask_data[logger_special_flags].mask,
          logger_flag);
Loic Hausammann's avatar
Loic Hausammann committed
515
    }
516
#endif
517
  }
518

519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
  /* Put the bparts into the corresponding proxies. */
  for (size_t k = 0; k < *Nbpart; k++) {

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_bpart[k] == -1) continue;

    /* Get the target node and proxy ID. */
    const int node_id = e->s->cells_top[ind_bpart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
    if (pid < 0) {
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lld, x=[%e,%e,%e].",
          node_id, s->bparts[offset_bparts + k].id,
          s->bparts[offset_bparts + k].x[0], s->bparts[offset_bparts + k].x[1],
          s->bparts[offset_bparts + k].x[2]);
    }

    /* Re-link the associated gpart with the buffer offset of the bpart. */
    if (s->bparts[offset_bparts + k].gpart != NULL) {
      s->bparts[offset_bparts + k].gpart->id_or_neg_offset =
          -e->proxies[pid].nr_bparts_out;
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (s->bparts[offset_bparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif

    /* Load the bpart into the proxy */
    proxy_bparts_load(&e->proxies[pid], &s->bparts[offset_bparts + k], 1);
552
553

#ifdef WITH_LOGGER
Loic Hausammann's avatar
Loic Hausammann committed
554
    if (e->policy & engine_policy_logger) {
Loic Hausammann's avatar
Loic Hausammann committed
555
      error("Not yet implemented.");
Loic Hausammann's avatar
Loic Hausammann committed
556
    }
557
#endif
558
  }
559

560
561
  /* Put the gparts into the corresponding proxies. */
  for (size_t k = 0; k < *Ngpart; k++) {
562
563
564
565
566

    /* Ignore the particles we want to get rid of (inhibited, ...). */
    if (ind_gpart[k] == -1) continue;

    /* Get the target node and proxy ID. */
567
568
569
570
    const int node_id = e->s->cells_top[ind_gpart[k]].nodeID;
    if (node_id < 0 || node_id >= e->nr_nodes)
      error("Bad node ID %i.", node_id);
    const int pid = e->proxy_ind[node_id];
571
    if (pid < 0) {
572
573
574
575
576
577
      error(
          "Do not have a proxy for the requested nodeID %i for part with "
          "id=%lli, x=[%e,%e,%e].",
          node_id, s->gparts[offset_gparts + k].id_or_neg_offset,
          s->gparts[offset_gparts + k].x[0], s->gparts[offset_gparts + k].x[1],
          s->gparts[offset_gparts + k].x[2]);
578
579
580
581
582
583
    }

#ifdef SWIFT_DEBUG_CHECKS
    if (s->gparts[offset_gparts + k].time_bin == time_bin_inhibited)
      error("Attempting to exchange an inhibited particle");
#endif
584
585
586

    /* Load the gpart into the proxy */
    proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1);
587
588
589

#ifdef WITH_LOGGER
    /* Write only the dark matter particles */
Loic Hausammann's avatar
Loic Hausammann committed
590
591
    if ((e->policy & engine_policy_logger) &&
        s->gparts[offset_gparts + k].type == swift_type_dark_matter) {
Loic Hausammann's avatar
Loic Hausammann committed
592

Loic Hausammann's avatar
Format    
Loic Hausammann committed
593
594
      const uint32_t logger_flag =
          logger_pack_flags_and_data(logger_flag_mpi_exit, node_id);
595
596

      /* Log the particle when leaving a rank. */
Loic Hausammann's avatar
Format    
Loic Hausammann committed
597
598
599
600
      logger_log_gpart(
          e->logger, &s->gparts[offset_gparts + k],
          logger_masks_all_gpart | logger_mask_data[logger_special_flags].mask,
          logger_flag);
601
602
    }
#endif
603
604
  }

605
  /* Launch the proxies. */
606
607
  MPI_Request reqs_in[5 * engine_maxproxies];
  MPI_Request reqs_out[5 * engine_maxproxies];
608
  for (int k = 0; k < e->nr_proxies; k++) {
609
    proxy_parts_exchange_first(&e->proxies[k]);
610
611
612
613
614
    reqs_in[k] = e->proxies[k].req_parts_count_in;
    reqs_out[k] = e->proxies[k].req_parts_count_out;
  }

  /* Wait for each count to come in and start the recv. */
615
  for (int k = 0; k < e->nr_proxies; k++) {
616
    int pid = MPI_UNDEFINED;
617
618
    if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) !=
            MPI_SUCCESS ||
619
620
621
        pid == MPI_UNDEFINED)
      error("MPI_Waitany failed.");
    // message( "request from proxy %i has arrived." , pid );
622
    proxy_parts_exchange_second(&e->proxies[pid]);
623
  }
624

625
  /* Wait for all the sends to have finished too. */
626
627
628
  if (MPI_Waitall(e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS)
    error("MPI_Waitall on sends failed.");

629
  /* Count the total number of incoming particles and make sure we have
630
     enough space to accommodate them. */
631
632
  int count_parts_in = 0;
  int count_gparts_in = 0;
633
  int count_sparts_in = 0;
634
  int count_bparts_in = 0;
635
636
637
  for (int k = 0; k < e->nr_proxies; k++) {
    count_parts_in += e->proxies[k].nr_parts_in;
    count_gparts_in += e->proxies[k].nr_gparts_in;
638
    count_sparts_in += e->proxies[k].nr_sparts_in;
639
    count_bparts_in += e->proxies[k].nr_bparts_in;
640
641
  }
  if (e->verbose) {
642
643
644
645
646
    message(
        "sent out %zu/%zu/%zu/%zu parts/gparts/sparts/bparts, got %i/%i/%i/%i "
        "back.",
        *Npart, *Ngpart, *Nspart, *Nbpart, count_parts_in, count_gparts_in,
        count_sparts_in, count_bparts_in);
647
  }
648
649

  /* Reallocate the particle arrays if necessary */
650
  if (offset_parts + count_parts_in > s->size_parts) {
651
    s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow;
652
653
    struct part *parts_new = NULL;
    struct xpart *xparts_new = NULL;
654
    if (swift_memalign("parts", (void **)&parts_new, part_align,
655
                       sizeof(struct part) * s->size_parts) != 0 ||
656
        swift_memalign("xparts", (void **)&xparts_new, xpart_align,
657
658
                       sizeof(struct xpart) * s->size_parts) != 0)
      error("Failed to allocate new part data.");
659
660
    memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts);
    memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts);
661
662
    swift_free("parts", s->parts);
    swift_free("xparts", s->xparts);
663
664
    s->parts = parts_new;
    s->xparts = xparts_new;
665
666

    /* Reset the links */
Pedro Gonnet's avatar
Pedro Gonnet committed
667
668
    for (size_t k = 0; k < offset_parts; k++) {
      if (s->parts[k].gpart != NULL) {
669
        s->parts[k].gpart->id_or_neg_offset = -k;
Pedro Gonnet's avatar
Pedro Gonnet committed
670
671
      }
    }
672
  }
673

674
675
676
  if (offset_sparts + count_sparts_in > s->size_sparts) {
    s->size_sparts = (offset_sparts + count_sparts_in) * engine_parts_size_grow;
    struct spart *sparts_new = NULL;
677
    if (swift_memalign("sparts", (void **)&sparts_new, spart_align,
678
679
680
                       sizeof(struct spart) * s->size_sparts) != 0)
      error("Failed to allocate new spart data.");
    memcpy(sparts_new, s->sparts, sizeof(struct spart) * offset_sparts);
681
    swift_free("sparts", s->sparts);
682
    s->sparts = sparts_new;
683
684

    /* Reset the links */
685
    for (size_t k = 0; k < offset_sparts; k++) {
686
687
688
689
690
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
    }
  }
691

692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
  if (offset_bparts + count_bparts_in > s->size_bparts) {
    s->size_bparts = (offset_bparts + count_bparts_in) * engine_parts_size_grow;
    struct bpart *bparts_new = NULL;
    if (swift_memalign("bparts", (void **)&bparts_new, bpart_align,
                       sizeof(struct bpart) * s->size_bparts) != 0)
      error("Failed to allocate new bpart data.");
    memcpy(bparts_new, s->bparts, sizeof(struct bpart) * offset_bparts);
    swift_free("bparts", s->bparts);
    s->bparts = bparts_new;

    /* Reset the links */
    for (size_t k = 0; k < offset_bparts; k++) {
      if (s->bparts[k].gpart != NULL) {
        s->bparts[k].gpart->id_or_neg_offset = -k;
      }
    }
  }
709

710
  if (offset_gparts + count_gparts_in > s->size_gparts) {
711
    s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow;
712
    struct gpart *gparts_new = NULL;
713
    if (swift_memalign("gparts", (void **)&gparts_new, gpart_align,
714
715
716
                       sizeof(struct gpart) * s->size_gparts) != 0)
      error("Failed to allocate new gpart data.");
    memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts);
717
    swift_free("gparts", s->gparts);
718
    s->gparts = gparts_new;
719

720
    /* Reset the links */
Pedro Gonnet's avatar
Pedro Gonnet committed
721
    for (size_t k = 0; k < offset_gparts; k++) {
722
      if (s->gparts[k].type == swift_type_gas) {
723
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
724
      } else if (s->gparts[k].type == swift_type_stars) {
725
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
726
727
      } else if (s->gparts[k].type == swift_type_black_hole) {
        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
Pedro Gonnet's avatar
Pedro Gonnet committed
728
729
      }
    }
730
  }
731
732

  /* Collect the requests for the particle data from the proxies. */
733
734
  int nr_in = 0, nr_out = 0;
  for (int k = 0; k < e->nr_proxies; k++) {
735
    if (e->proxies[k].nr_parts_in > 0) {
736
737
      reqs_in[5 * k] = e->proxies[k].req_parts_in;
      reqs_in[5 * k + 1] = e->proxies[k].req_xparts_in;
738
739
      nr_in += 2;
    } else {
740
      reqs_in[5 * k] = reqs_in[5 * k + 1] = MPI_REQUEST_NULL;
741
742
    }
    if (e->proxies[k].nr_gparts_in > 0) {
743
      reqs_in[5 * k + 2] = e->proxies[k].req_gparts_in;
744
      nr_in += 1;
745
    } else {
746
      reqs_in[5 * k + 2] = MPI_REQUEST_NULL;
747
    }
748
    if (e->proxies[k].nr_sparts_in > 0) {
749
      reqs_in[5 * k + 3] = e->proxies[k].req_sparts_in;
750
751
      nr_in += 1;
    } else {
752
753
754
755
756
757
758
      reqs_in[5 * k + 3] = MPI_REQUEST_NULL;
    }
    if (e->proxies[k].nr_bparts_in > 0) {
      reqs_in[5 * k + 4] = e->proxies[k].req_bparts_in;
      nr_in += 1;
    } else {
      reqs_in[5 * k + 4] = MPI_REQUEST_NULL;
759
760
    }

761
    if (e->proxies[k].nr_parts_out > 0) {
762
763
      reqs_out[5 * k] = e->proxies[k].req_parts_out;
      reqs_out[5 * k + 1] = e->proxies[k].req_xparts_out;
764
765
      nr_out += 2;
    } else {
766
      reqs_out[5 * k] = reqs_out[5 * k + 1] = MPI_REQUEST_NULL;
767
768
    }
    if (e->proxies[k].nr_gparts_out > 0) {
769
      reqs_out[5 * k + 2] = e->proxies[k].req_gparts_out;
770
771
      nr_out += 1;
    } else {
772
      reqs_out[5 * k + 2] = MPI_REQUEST_NULL;
773
774
    }
    if (e->proxies[k].nr_sparts_out > 0) {
775
776
777
778
779
780
781
      reqs_out[5 * k + 3] = e->proxies[k].req_sparts_out;
      nr_out += 1;
    } else {
      reqs_out[5 * k + 3] = MPI_REQUEST_NULL;
    }
    if (e->proxies[k].nr_bparts_out > 0) {
      reqs_out[5 * k + 4] = e->proxies[k].req_bparts_out;
782
      nr_out += 1;
783
    } else {
784
      reqs_out[5 * k + 4] = MPI_REQUEST_NULL;
785
    }
786
787
788
789
  }

  /* Wait for each part array to come in and collect the new
     parts from the proxies. */
790
  int count_parts = 0, count_gparts = 0, count_sparts = 0, count_bparts = 0;
791
  for (int k = 0; k < nr_in; k++) {
792
    int err, pid;
793
    if ((err = MPI_Waitany(5 * e->nr_proxies, reqs_in, &pid,
794
                           MPI_STATUS_IGNORE)) != MPI_SUCCESS) {
795
796
797
798
799
800
      char buff[MPI_MAX_ERROR_STRING];
      int res;
      MPI_Error_string(err, buff, &res);
      error("MPI_Waitany failed (%s).", buff);
    }
    if (pid == MPI_UNDEFINED) break;
801
802
    // message( "request from proxy %i has arrived." , pid / 5 );
    pid = 5 * (pid / 5);
Pedro Gonnet's avatar
Pedro Gonnet committed
803

804
    /* If all the requests for a given proxy have arrived... */
805
806
    if (reqs_in[pid + 0] == MPI_REQUEST_NULL &&
        reqs_in[pid + 1] == MPI_REQUEST_NULL &&
807
        reqs_in[pid + 2] == MPI_REQUEST_NULL &&
808
        reqs_in[pid + 3] == MPI_REQUEST_NULL &&
809
        reqs_in[pid + 4] == MPI_REQUEST_NULL) {
810
      /* Copy the particle data to the part/xpart/gpart arrays. */
811
      struct proxy *prox = &e->proxies[pid / 5];
812
813
814
815
816
817
      memcpy(&s->parts[offset_parts + count_parts], prox->parts_in,
             sizeof(struct part) * prox->nr_parts_in);
      memcpy(&s->xparts[offset_parts + count_parts], prox->xparts_in,
             sizeof(struct xpart) * prox->nr_parts_in);
      memcpy(&s->gparts[offset_gparts + count_gparts], prox->gparts_in,
             sizeof(struct gpart) * prox->nr_gparts_in);
818
819
      memcpy(&s->sparts[offset_sparts + count_sparts], prox->sparts_in,
             sizeof(struct spart) * prox->nr_sparts_in);
820
821
      memcpy(&s->bparts[offset_bparts + count_bparts], prox->bparts_in,
             sizeof(struct bpart) * prox->nr_bparts_in);
822
823

#ifdef WITH_LOGGER
Loic Hausammann's avatar
Loic Hausammann committed
824
      if (e->policy & engine_policy_logger) {
Loic Hausammann's avatar
Format    
Loic Hausammann committed
825
826
        const uint32_t flag =
            logger_pack_flags_and_data(logger_flag_mpi_enter, prox->nodeID);
Loic Hausammann's avatar
Loic Hausammann committed
827
828
829
830
831
832
833

        struct part *parts = &s->parts[offset_parts + count_parts];
        struct xpart *xparts = &s->xparts[offset_parts + count_parts];
        struct spart *sparts = &s->sparts[offset_sparts + count_sparts];
        struct gpart *gparts = &s->gparts[offset_gparts + count_gparts];

        /* Log the gas particles */
Loic Hausammann's avatar
Format    
Loic Hausammann committed
834
835
836
837
        logger_log_parts(
            e->logger, parts, xparts, prox->nr_parts_in,
            logger_masks_all_part | logger_mask_data[logger_special_flags].mask,
            flag);
Loic Hausammann's avatar
Loic Hausammann committed
838
839

        /* Log the stellar particles */
840
841
        logger_log_sparts(e->logger, sparts, prox->nr_sparts_in,
                          logger_masks_all_spart |
Loic Hausammann's avatar
Format    
Loic Hausammann committed
842
                              logger_mask_data[logger_special_flags].mask,
843
                          flag);
Loic Hausammann's avatar
Loic Hausammann committed
844
845

        /* Log the gparts */
846
847
        logger_log_gparts(e->logger, gparts, prox->nr_gparts_in,
                          logger_masks_all_gpart |
Loic Hausammann's avatar
Format    
Loic Hausammann committed
848
                              logger_mask_data[logger_special_flags].mask,
849
                          flag);
Loic Hausammann's avatar
Loic Hausammann committed
850
851
852
853
854
855

        /* Log the bparts */
        if (prox->nr_bparts_in > 0) {
          error("TODO");
        }
      }
856
#endif
857
858
      /* for (int k = offset; k < offset + count; k++)
         message(