engine.c 143 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
36
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
37

38
39
/* MPI headers. */
#ifdef WITH_MPI
40
#include <mpi.h>
41
42
#endif

Angus Lepper's avatar
Angus Lepper committed
43
44
#ifdef HAVE_LIBNUMA
#include <numa.h>
45
46
#endif

47
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
48
#include "engine.h"
49
50

/* Local headers. */
51
#include "active.h"
52
#include "atomic.h"
53
#include "cell.h"
54
#include "clocks.h"
55
56
#include "cycle.h"
#include "debug.h"
57
#include "error.h"
58
#include "gravity.h"
59
#include "hydro.h"
60
#include "minmax.h"
61
#include "parallel_io.h"
62
#include "part.h"
63
#include "partition.h"
James Willis's avatar
James Willis committed
64
#include "profiler.h"
65
66
#include "proxy.h"
#include "runner.h"
67
68
#include "serial_io.h"
#include "single_io.h"
69
#include "sort_part.h"
70
#include "statistics.h"
71
#include "timers.h"
72
#include "tools.h"
73
#include "units.h"
74
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
75

76
77
78
/* Particle cache size. */
#define CACHE_SIZE 512

79
const char *engine_policy_names[16] = {"none",
Pedro Gonnet's avatar
Pedro Gonnet committed
80
81
82
83
84
85
86
87
88
89
                                       "rand",
                                       "steal",
                                       "keep",
                                       "block",
                                       "cpu_tight",
                                       "mpi",
                                       "numa_affinity",
                                       "hydro",
                                       "self_gravity",
                                       "external_gravity",
90
                                       "cosmology_integration",
91
                                       "drift_all",
92
                                       "cooling",
93
                                       "sourceterms",
94
                                       "stars"};
Pedro Gonnet's avatar
Pedro Gonnet committed
95

96
97
98
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

99
100
101
102
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
103
 * @param l A pointer to the #link, will be modified atomically.
104
105
106
107
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
108
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
109

110
  /* Get the next free link. */
111
112
113
114
115
  const int ind = atomic_inc(&e->nr_links);
  if (ind >= e->size_links) {
    error("Link table overflow.");
  }
  struct link *res = &e->links[ind];
116

117
  /* Set it atomically. */
118
  res->t = t;
119
  res->next = atomic_swap(l, res);
120
}
121

122
123
124
125
126
127
/**
 * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
 * i.e. all the O(Npart) tasks.
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
128
129
 * Note that there is no need to recurse below the super-cell.
 *
130
131
132
 * @param e The #engine.
 * @param c The #cell.
 */
133
void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
134
135

  struct scheduler *s = &e->sched;
136
  const int is_hydro = (e->policy & engine_policy_hydro);
137
  const int is_self_gravity = (e->policy & engine_policy_self_gravity);
138
  const int is_with_cooling = (e->policy & engine_policy_cooling);
Matthieu Schaller's avatar
Matthieu Schaller committed
139
  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
140

141
  /* Are we in a super-cell ? */
142
  if (c->super == c) {
143
144
145
146

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

Matthieu Schaller's avatar
Matthieu Schaller committed
147
      /* Add the init task. */
148
149
      c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c,
                                  NULL, 0);
150

151
152
153
154
155
156
      /* Add the two half kicks */
      c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
                                   c, NULL, 0);

      c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0,
                                   c, NULL, 0);
Tom Theuns's avatar
Tom Theuns committed
157

158
159
160
161
162
      /* Add the time-step calculation task and its dependency */
      c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none,
                                      0, 0, c, NULL, 0);

      scheduler_addunlock(s, c->kick2, c->timestep);
163
      scheduler_addunlock(s, c->timestep, c->kick1);
164

165
      /* Add the drift task and its dependencies. */
166
167
168
169
170
      c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0,
                                   c, NULL, 0);

      scheduler_addunlock(s, c->drift, c->init);

171
      if (is_self_gravity) {
172

173
174
175
176
        /* Initialisation of the multipoles */
        c->init_grav = scheduler_addtask(s, task_type_init_grav,
                                         task_subtype_none, 0, 0, c, NULL, 0);

177
178
179
180
181
182
183
184
        /* Gravity non-neighbouring pm calculations */
        c->grav_long_range = scheduler_addtask(
            s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL, 0);

        /* Gravity top-level periodic calculation */
        c->grav_top_level = scheduler_addtask(
            s, task_type_grav_top_level, task_subtype_none, 0, 0, c, NULL, 0);

185
186
187
188
        /* Gravity recursive down-pass */
        c->grav_down = scheduler_addtask(s, task_type_grav_down,
                                         task_subtype_none, 0, 0, c, NULL, 0);

189
190
        scheduler_addunlock(s, c->init_grav, c->grav_long_range);
        scheduler_addunlock(s, c->init_grav, c->grav_top_level);
191
192
193
        scheduler_addunlock(s, c->grav_long_range, c->grav_down);
        scheduler_addunlock(s, c->grav_top_level, c->grav_down);
        scheduler_addunlock(s, c->grav_down, c->kick2);
194
195
      }

196
      /* Generate the ghost task. */
197
      if (is_hydro)
Matthieu Schaller's avatar
Matthieu Schaller committed
198
199
        c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
                                     0, c, NULL, 0);
200
201
202

#ifdef EXTRA_HYDRO_LOOP
      /* Generate the extra ghost task. */
203
      if (is_hydro)
Matthieu Schaller's avatar
Matthieu Schaller committed
204
205
        c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
                                           task_subtype_none, 0, 0, c, NULL, 0);
206
#endif
207

208
      /* Cooling task */
209
      if (is_with_cooling) {
Matthieu Schaller's avatar
Matthieu Schaller committed
210
211
        c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
                                       0, 0, c, NULL, 0);
212
213
214
215

        scheduler_addunlock(s, c->cooling, c->kick2);
      }

216
      /* add source terms */
217
      if (is_with_sourceterms) {
218
219
        c->sourceterms = scheduler_addtask(s, task_type_sourceterms,
                                           task_subtype_none, 0, 0, c, NULL, 0);
220
      }
221
222
    }

223
  } else { /* We are above the super-cell so need to go deeper */
224

225
226
227
#ifdef SWIFT_DEBUG_CHECKS
    if (c->super != NULL) error("Incorrectly set super pointer");
#endif
228

229
230
231
232
    /* Recurse. */
    if (c->split)
      for (int k = 0; k < 8; k++)
        if (c->progeny[k] != NULL)
233
          engine_make_hierarchical_tasks(e, c->progeny[k]);
234
  }
235
}
236

237
/**
238
 * @brief Redistribute the particles amongst the nodes according
239
240
 *      to their cell's node IDs.
 *
241
242
243
244
 * The strategy here is as follows:
 * 1) Each node counts the number of particles it has to send to each other
 * node.
 * 2) The number of particles of each type is then exchanged.
245
246
247
248
 * 3) The particles to send are placed in a temporary buffer in which the
 * part-gpart links are preserved.
 * 4) Each node allocates enough space for the new particles.
 * 5) (Asynchronous) communications are issued to transfer the data.
249
250
 *
 *
251
252
 * @param e The #engine.
 */
253
void engine_redistribute(struct engine *e) {
254

255
#ifdef WITH_MPI
256

257
258
  const int nr_nodes = e->nr_nodes;
  const int nodeID = e->nodeID;
259
  struct space *s = e->s;
260
  struct cell *cells = s->cells_top;
261
  const int nr_cells = s->nr_cells;
262
  const int *cdim = s->cdim;
263
  const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
264
265
266
267
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  struct part *parts = s->parts;
  struct xpart *xparts = s->xparts;
  struct gpart *gparts = s->gparts;
268
  struct spart *sparts = s->sparts;
269
  ticks tic = getticks();
270

271
272
  /* Allocate temporary arrays to store the counts of particles to be sent
     and the destination of each particle */
273
  int *counts, *g_counts, *s_counts;
274
  if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
275
    error("Failed to allocate counts temporary buffer.");
276
  if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
277
    error("Failed to allocate g_gcount temporary buffer.");
278
  if ((s_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
279
    error("Failed to allocate s_counts temporary buffer.");
280
  bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
281
  bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes);
282
  bzero(s_counts, sizeof(int) * nr_nodes * nr_nodes);
283

284
  /* Allocate the destination index arrays. */
285
  int *dest, *g_dest, *s_dest;
286
  if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
287
    error("Failed to allocate dest temporary buffer.");
288
  if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
289
    error("Failed to allocate g_dest temporary buffer.");
290
291
  if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL)
    error("Failed to allocate s_dest temporary buffer.");
292

293
  /* Get destination of each particle */
294
  for (size_t k = 0; k < s->nr_parts; k++) {
295
296

    /* Periodic boundary conditions */
297
    for (int j = 0; j < 3; j++) {
298
299
300
301
302
      if (parts[k].x[j] < 0.0)
        parts[k].x[j] += dim[j];
      else if (parts[k].x[j] >= dim[j])
        parts[k].x[j] -= dim[j];
    }
James Willis's avatar
James Willis committed
303
304
305
    const int cid =
        cell_getid(cdim, parts[k].x[0] * iwidth[0], parts[k].x[1] * iwidth[1],
                   parts[k].x[2] * iwidth[2]);
306
307
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
308
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
309
310
311
            parts[k].x[0], parts[k].x[1], parts[k].x[2]);
#endif

312
    dest[k] = cells[cid].nodeID;
313
314

    /* The counts array is indexed as count[from * nr_nodes + to]. */
315
316
    counts[nodeID * nr_nodes + dest[k]] += 1;
  }
317
318

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
319
  if (s->nr_parts > 0)
320
    space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
321

322
323
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
324
325
326
327
  for (size_t k = 0; k < s->nr_parts; k++) {
    const struct part *p = &s->parts[k];

    /* New cell index */
328
    const int new_cid =
329
330
331
332
        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
                   p->x[2] * s->iwidth[2]);

    /* New cell of this part */
333
334
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
335

336
337
    if (dest[k] != new_node)
      error("part's new node index not matching sorted index.");
338
339
340
341
342

    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
      error("part not sorted into the right top-level cell!");
343
344
345
  }
#endif

346
  /* We need to re-link the gpart partners of parts. */
347
348
349
350
351
352
  if (s->nr_parts > 0) {
    int current_dest = dest[0];
    size_t count_this_dest = 0;
    for (size_t k = 0; k < s->nr_parts; ++k) {
      if (s->parts[k].gpart != NULL) {

353
354
355
        /* As the addresses will be invalidated by the communications, we will
         * instead store the absolute index from the start of the sub-array of
         * particles to be sent to a given node.
356
         * Recall that gparts without partners have a positive id.
357
         * We will restore the pointers on the receiving node later on. */
358
359
360
361
        if (dest[k] != current_dest) {
          current_dest = dest[k];
          count_this_dest = 0;
        }
362

363
#ifdef SWIFT_DEBUG_CHECKS
364
        if (s->parts[k].gpart->id_or_neg_offset > 0)
365
366
          error("Trying to link a partnerless gpart !");
#endif
367

368
        s->parts[k].gpart->id_or_neg_offset = -count_this_dest;
369
        count_this_dest++;
370
371
372
373
      }
    }
  }

374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
  /* Get destination of each s-particle */
  for (size_t k = 0; k < s->nr_sparts; k++) {

    /* Periodic boundary conditions */
    for (int j = 0; j < 3; j++) {
      if (sparts[k].x[j] < 0.0)
        sparts[k].x[j] += dim[j];
      else if (sparts[k].x[j] >= dim[j])
        sparts[k].x[j] -= dim[j];
    }
    const int cid =
        cell_getid(cdim, sparts[k].x[0] * iwidth[0], sparts[k].x[1] * iwidth[1],
                   sparts[k].x[2] * iwidth[2]);
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
            sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]);
#endif

    s_dest[k] = cells[cid].nodeID;

    /* The counts array is indexed as count[from * nr_nodes + to]. */
    s_counts[nodeID * nr_nodes + s_dest[k]] += 1;
  }

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
400
  if (s->nr_sparts > 0)
401
    space_sparts_sort(s, s_dest, s->nr_sparts, 0, nr_nodes - 1, e->verbose);
402

403
404
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the spart have been sorted correctly. */
405
406
407
408
  for (size_t k = 0; k < s->nr_sparts; k++) {
    const struct spart *sp = &s->sparts[k];

    /* New cell index */
409
    const int new_cid =
410
411
412
413
        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
                   sp->x[2] * s->iwidth[2]);

    /* New cell of this spart */
414
415
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
416

417
418
    if (s_dest[k] != new_node)
      error("spart's new node index not matching sorted index.");
419
420
421
422
423

    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
      error("spart not sorted into the right top-level cell!");
424
425
426
  }
#endif

427
  /* We need to re-link the gpart partners of sparts. */
428
429
430
  if (s->nr_sparts > 0) {
    int current_dest = s_dest[0];
    size_t count_this_dest = 0;
431
    for (size_t k = 0; k < s->nr_sparts; ++k) {
432
433
434
435
436
      if (s->sparts[k].gpart != NULL) {

        /* As the addresses will be invalidated by the communications, we will
         * instead store the absolute index from the start of the sub-array of
         * particles to be sent to a given node.
437
         * Recall that gparts without partners have a positive id.
438
439
440
441
442
443
444
         * We will restore the pointers on the receiving node later on. */
        if (s_dest[k] != current_dest) {
          current_dest = s_dest[k];
          count_this_dest = 0;
        }

#ifdef SWIFT_DEBUG_CHECKS
445
        if (s->sparts[k].gpart->id_or_neg_offset > 0)
446
447
448
449
450
451
452
453
454
          error("Trying to link a partnerless gpart !");
#endif

        s->sparts[k].gpart->id_or_neg_offset = -count_this_dest;
        count_this_dest++;
      }
    }
  }

455
  /* Get destination of each g-particle */
456
  for (size_t k = 0; k < s->nr_gparts; k++) {
457
458

    /* Periodic boundary conditions */
459
    for (int j = 0; j < 3; j++) {
460
461
462
463
      if (gparts[k].x[j] < 0.0)
        gparts[k].x[j] += dim[j];
      else if (gparts[k].x[j] >= dim[j])
        gparts[k].x[j] -= dim[j];
464
    }
James Willis's avatar
James Willis committed
465
466
467
    const int cid =
        cell_getid(cdim, gparts[k].x[0] * iwidth[0], gparts[k].x[1] * iwidth[1],
                   gparts[k].x[2] * iwidth[2]);
468
469
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
470
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
471
472
473
            gparts[k].x[0], gparts[k].x[1], gparts[k].x[2]);
#endif

474
    g_dest[k] = cells[cid].nodeID;
475
476

    /* The counts array is indexed as count[from * nr_nodes + to]. */
477
    g_counts[nodeID * nr_nodes + g_dest[k]] += 1;
478
  }
479
480

  /* Sort the gparticles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
481
  if (s->nr_gparts > 0)
482
    space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
483

484
485
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the gpart have been sorted correctly. */
486
487
488
489
  for (size_t k = 0; k < s->nr_gparts; k++) {
    const struct gpart *gp = &s->gparts[k];

    /* New cell index */
490
    const int new_cid =
491
492
493
494
        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
                   gp->x[2] * s->iwidth[2]);

    /* New cell of this gpart */
495
496
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
497

498
499
    if (g_dest[k] != new_node)
      error("gpart's new node index not matching sorted index.");
500
501
502
503
504

    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
      error("gpart not sorted into the right top-level cell!");
505
506
507
  }
#endif

508
509
510
511
512
  /* Get all the counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce particle transfer counts.");

513
  /* Get all the s_counts from all the nodes. */
514
515
516
517
518
519
520
521
522
  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce gparticle transfer counts.");

  /* Get all the g_counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce sparticle transfer counts.");

Peter W. Draper's avatar
Peter W. Draper committed
523
  /* Report how many particles will be moved. */
524
525
  if (e->verbose) {
    if (e->nodeID == 0) {
526
527
      size_t total = 0, g_total = 0, s_total = 0;
      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0;
528
529
530
      for (int p = 0, r = 0; p < nr_nodes; p++) {
        for (int s = 0; s < nr_nodes; s++) {
          total += counts[r];
531
532
533
534
535
536
537
          g_total += g_counts[r];
          s_total += s_counts[r];
          if (p == s) {
            unmoved += counts[r];
            g_unmoved += g_counts[r];
            s_unmoved += s_counts[r];
          }
538
539
540
          r++;
        }
      }
Matthieu Schaller's avatar
Matthieu Schaller committed
541
542
543
544
545
546
547
548
549
550
551
      if (total > 0)
        message("%ld of %ld (%.2f%%) of particles moved", total - unmoved,
                total, 100.0 * (double)(total - unmoved) / (double)total);
      if (g_total > 0)
        message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved,
                g_total,
                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
      if (s_total > 0)
        message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved,
                s_total,
                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
552
    }
553
554
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
555
  /* Each node knows how many parts, sparts and gparts will be transferred
556
     to every other node. We can start preparing to receive data */
557
558

  /* Get the new number of parts and gparts for this node */
559
  size_t nr_parts = 0, nr_gparts = 0, nr_sparts = 0;
560
  for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
561
562
  for (int k = 0; k < nr_nodes; k++)
    nr_gparts += g_counts[k * nr_nodes + nodeID];
563
564
  for (int k = 0; k < nr_nodes; k++)
    nr_sparts += s_counts[k * nr_nodes + nodeID];
565
566

  /* Allocate the new arrays with some extra margin */
567
  struct part *parts_new = NULL;
568
  struct xpart *xparts_new = NULL;
569
  struct gpart *gparts_new = NULL;
570
  struct spart *sparts_new = NULL;
571
  if (posix_memalign((void **)&parts_new, part_align,
572
573
                     sizeof(struct part) * nr_parts *
                         engine_redistribute_alloc_margin) != 0)
574
    error("Failed to allocate new part data.");
575
576
577
578
579
580
581
582
  if (posix_memalign((void **)&xparts_new, xpart_align,
                     sizeof(struct xpart) * nr_parts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new xpart data.");
  if (posix_memalign((void **)&gparts_new, gpart_align,
                     sizeof(struct gpart) * nr_gparts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new gpart data.");
583
584
585
586
  if (posix_memalign((void **)&sparts_new, spart_align,
                     sizeof(struct spart) * nr_sparts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new spart data.");
587
588

  /* Prepare MPI requests for the asynchronous communications */
589
  MPI_Request *reqs;
590
  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 8 * nr_nodes)) ==
591
592
      NULL)
    error("Failed to allocate MPI request list.");
593
  for (int k = 0; k < 8 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
594
595
596
597

  /* Emit the sends and recvs for the particle and gparticle data. */
  size_t offset_send = 0, offset_recv = 0;
  size_t g_offset_send = 0, g_offset_recv = 0;
598
  size_t s_offset_send = 0, s_offset_recv = 0;
599
600
601
602
603
604
605
  for (int k = 0; k < nr_nodes; k++) {

    /* Indices in the count arrays of the node of interest */
    const int ind_send = nodeID * nr_nodes + k;
    const int ind_recv = k * nr_nodes + nodeID;

    /* Are we sending any part/xpart ? */
606
    if (counts[ind_send] > 0) {
607

608
      /* message("Sending %d part to node %d", counts[ind_send], k); */
Matthieu Schaller's avatar
Matthieu Schaller committed
609

610
      /* If the send is to the same node, just copy */
611
612
613
614
615
616
617
      if (k == nodeID) {
        memcpy(&parts_new[offset_recv], &s->parts[offset_send],
               sizeof(struct part) * counts[ind_recv]);
        memcpy(&xparts_new[offset_recv], &s->xparts[offset_send],
               sizeof(struct xpart) * counts[ind_recv]);
        offset_send += counts[ind_send];
        offset_recv += counts[ind_recv];
618
619

        /* Else, emit some communications */
620
      } else {
621
        if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type,
622
623
                      k, 4 * ind_send + 0, MPI_COMM_WORLD,
                      &reqs[8 * k + 0]) != MPI_SUCCESS)
624
          error("Failed to isend parts to node %i.", k);
625
        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type,
626
627
                      k, 4 * ind_send + 1, MPI_COMM_WORLD,
                      &reqs[8 * k + 1]) != MPI_SUCCESS)
628
          error("Failed to isend xparts to node %i.", k);
629
630
        offset_send += counts[ind_send];
      }
631
    }
632
633
634
635

    /* Are we sending any gpart ? */
    if (g_counts[ind_send] > 0) {

636
      /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */
637

638
639
      /* If the send is to the same node, just copy */
      if (k == nodeID) {
640
        memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send],
641
642
643
644
645
646
647
               sizeof(struct gpart) * g_counts[ind_recv]);
        g_offset_send += g_counts[ind_send];
        g_offset_recv += g_counts[ind_recv];

        /* Else, emit some communications */
      } else {
        if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send],
648
649
                      gpart_mpi_type, k, 4 * ind_send + 2, MPI_COMM_WORLD,
                      &reqs[8 * k + 2]) != MPI_SUCCESS)
650
          error("Failed to isend gparts to node %i.", k);
651
        g_offset_send += g_counts[ind_send];
652
653
654
      }
    }

655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
    /* Are we sending any spart ? */
    if (s_counts[ind_send] > 0) {

      /* message("Sending %d spart to node %d", s_counts[ind_send], k); */

      /* If the send is to the same node, just copy */
      if (k == nodeID) {
        memcpy(&sparts_new[s_offset_recv], &s->sparts[s_offset_send],
               sizeof(struct spart) * s_counts[ind_recv]);
        s_offset_send += s_counts[ind_send];
        s_offset_recv += s_counts[ind_recv];

        /* Else, emit some communications */
      } else {
        if (MPI_Isend(&s->sparts[s_offset_send], s_counts[ind_send],
                      spart_mpi_type, k, 4 * ind_send + 3, MPI_COMM_WORLD,
                      &reqs[8 * k + 3]) != MPI_SUCCESS)
          error("Failed to isend gparts to node %i.", k);
        s_offset_send += s_counts[ind_send];
      }
    }

677
678
679
    /* Now emit the corresponding Irecv() */

    /* Are we receiving any part/xpart from this node ? */
680
    if (k != nodeID && counts[ind_recv] > 0) {
681
      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k,
682
683
                    4 * ind_recv + 0, MPI_COMM_WORLD,
                    &reqs[8 * k + 4]) != MPI_SUCCESS)
684
        error("Failed to emit irecv of parts from node %i.", k);
685
      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type,
686
687
                    k, 4 * ind_recv + 1, MPI_COMM_WORLD,
                    &reqs[8 * k + 5]) != MPI_SUCCESS)
688
        error("Failed to emit irecv of xparts from node %i.", k);
689
      offset_recv += counts[ind_recv];
690
    }
691
692
693
694

    /* Are we receiving any gpart from this node ? */
    if (k != nodeID && g_counts[ind_recv] > 0) {
      if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv],
695
696
                    gpart_mpi_type, k, 4 * ind_recv + 2, MPI_COMM_WORLD,
                    &reqs[8 * k + 6]) != MPI_SUCCESS)
697
698
699
        error("Failed to emit irecv of gparts from node %i.", k);
      g_offset_recv += g_counts[ind_recv];
    }
700
701
702
703
704
705
706
707
708

    /* Are we receiving any spart from this node ? */
    if (k != nodeID && s_counts[ind_recv] > 0) {
      if (MPI_Irecv(&sparts_new[s_offset_recv], s_counts[ind_recv],
                    spart_mpi_type, k, 4 * ind_recv + 3, MPI_COMM_WORLD,
                    &reqs[8 * k + 7]) != MPI_SUCCESS)
        error("Failed to emit irecv of sparts from node %i.", k);
      s_offset_recv += s_counts[ind_recv];
    }
709
  }
710

711
  /* Wait for all the sends and recvs to tumble in. */
712
  MPI_Status stats[8 * nr_nodes];
713
  int res;
714
715
  if ((res = MPI_Waitall(8 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
    for (int k = 0; k < 8 * nr_nodes; k++) {
716
717
718
      char buff[MPI_MAX_ERROR_STRING];
      MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
      message("request %i has error '%s'.", k, buff);
719
    }
720
721
    error("Failed during waitall for part data.");
  }
722

723
724
725
726
727
  /* All particles have now arrived. Time for some final operations on the
     stuff we just received */

  /* Restore the part<->gpart and spart<->gpart links */
  size_t offset_parts = 0, offset_sparts = 0, offset_gparts = 0;
728
729
730
731
732
  for (int node = 0; node < nr_nodes; ++node) {

    const int ind_recv = node * nr_nodes + nodeID;
    const size_t count_parts = counts[ind_recv];
    const size_t count_gparts = g_counts[ind_recv];
733
    const size_t count_sparts = s_counts[ind_recv];
734
735
736
737

    /* Loop over the gparts received from that node */
    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) {

738
739
      /* Does this gpart have a gas partner ? */
      if (gparts_new[k].type == swift_type_gas) {
740

Matthieu Schaller's avatar
Style    
Matthieu Schaller committed
741
742
        const ptrdiff_t partner_index =
            offset_parts - gparts_new[k].id_or_neg_offset;
743
744

        /* Re-link */
745
        gparts_new[k].id_or_neg_offset = -partner_index;
Pedro Gonnet's avatar
Pedro Gonnet committed
746
        parts_new[partner_index].gpart = &gparts_new[k];
747
      }
748
749
750
751
752
753
754
755
756
757
758

      /* Does this gpart have a star partner ? */
      if (gparts_new[k].type == swift_type_star) {

        const ptrdiff_t partner_index =
            offset_sparts - gparts_new[k].id_or_neg_offset;

        /* Re-link */
        gparts_new[k].id_or_neg_offset = -partner_index;
        sparts_new[partner_index].gpart = &gparts_new[k];
      }
759
760
761
762
    }

    offset_parts += count_parts;
    offset_gparts += count_gparts;
763
    offset_sparts += count_sparts;
764
765
  }

766
#ifdef SWIFT_DEBUG_CHECKS
767
  /* Verify that all parts are in the right place. */
768
769
770
771
  for (size_t k = 0; k < nr_parts; k++) {
    const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
                               parts_new[k].x[1] * iwidth[1],
                               parts_new[k].x[2] * iwidth[2]);
772
    if (cells[cid].nodeID != nodeID)
773
      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
774
775
            cells[cid].nodeID);
  }
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
  for (size_t k = 0; k < nr_gparts; k++) {
    const int cid = cell_getid(cdim, gparts_new[k].x[0] * iwidth[0],
                               gparts_new[k].x[1] * iwidth[1],
                               gparts_new[k].x[2] * iwidth[2]);
    if (cells[cid].nodeID != nodeID)
      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
            k, cells[cid].nodeID);
  }
  for (size_t k = 0; k < nr_sparts; k++) {
    const int cid = cell_getid(cdim, sparts_new[k].x[0] * iwidth[0],
                               sparts_new[k].x[1] * iwidth[1],
                               sparts_new[k].x[2] * iwidth[2]);
    if (cells[cid].nodeID != nodeID)
      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
            k, cells[cid].nodeID);
  }
792

793
794
  /* Verify that the links are correct */
  part_verify_links(parts_new, gparts_new, sparts_new, nr_parts, nr_gparts,
795
                    nr_sparts, e->verbose);
796
#endif
797
798
799
800

  /* Set the new part data, free the old. */
  free(parts);
  free(xparts);
801
  free(gparts);
802
  free(sparts);
803
804
  s->parts = parts_new;
  s->xparts = xparts_new;
805
  s->gparts = gparts_new;
806
  s->sparts = sparts_new;
807
  s->nr_parts = nr_parts;
808
  s->nr_gparts = nr_gparts;
809
  s->nr_sparts = nr_sparts;
810
811
  s->size_parts = engine_redistribute_alloc_margin * nr_parts;
  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts;
812
  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts;
813

814
  /* Clean up the temporary stuff. */
815
816
817
818
  free(reqs);
  free(counts);
  free(dest);

819
820
821
822
823
  /* Be verbose about what just happened. */
  if (e->verbose) {
    int my_cells = 0;
    for (int k = 0; k < nr_cells; k++)
      if (cells[k].nodeID == nodeID) my_cells += 1;
824
825
    message("node %i now has %zu parts, %zu sparts and %zu gparts in %i cells.",
            nodeID, nr_parts, nr_sparts, nr_gparts, my_cells);
826
827
  }

828
829
830
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
831
#else
832
  error("SWIFT was not compiled with MPI support.");
833
834
#endif
}
835

836
/**
837
 * @brief Repartition the cells amongst the nodes.
838
839
840
 *
 * @param e The #engine.
 */
841
void engine_repartition(struct engine *e) {
842
843
844

#if defined(WITH_MPI) && defined(HAVE_METIS)

845
846
  ticks tic = getticks();

847
#ifdef SWIFT_DEBUG_CHECKS
848
849
850
851
  /* Be verbose about this. */
  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
  fflush(stdout);

852
  /* Check that all cells have been drifted to the current time */
853
854
  space_check_drift_point(e->s, e->ti_old,
                          e->policy & engine_policy_self_gravity);
855
856
#endif

857
  /* Clear the repartition flag. */
858
  e->forcerepart = 0;
859

860
861
  /* Nothing to do if only using a single node. Also avoids METIS
   * bug that doesn't handle this case well. */
862
  if (e->nr_nodes == 1) return;
863

864
  /* Do the repartitioning. */
865
  partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
866
                        e->sched.tasks, e->sched.nr_tasks);
867
868
869
870

  /* Now comes the tricky part: Exchange particles between all nodes.
     This is done in two steps, first allreducing a matrix of
     how many particles go from where to where, then re-allocating
871
     the parts array, and emitting the sends and receives.
872
873
874
875
876
877
878
879
880
881
882
     Finally, the space, tasks, and proxies need to be rebuilt. */

  /* Redistribute the particles between the nodes. */
  engine_redistribute(e);

  /* Make the proxies. */
  engine_makeproxies(e);

  /* Tell the engine it should re-build whenever possible */
  e->forcerebuild = 1;

883
884
885
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
886
#else
887
  error("SWIFT was not compiled with MPI and METIS support.");
888
#endif
889
}
890

891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
/**
 * @brief Decide whether trigger a repartition the cells amongst the nodes.
 *
 * @param e The #engine.
 */
void engine_repartition_trigger(struct engine *e) {

#ifdef WITH_MPI

  /* Do nothing if there have not been enough steps since the last
   * repartition, don't want to repeat this too often or immediately after
   * a repartition step. */
  if (e->step - e->last_repa