engine.c 150 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
36
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
37

38
39
/* MPI headers. */
#ifdef WITH_MPI
40
#include <mpi.h>
41
42
#endif

Angus Lepper's avatar
Angus Lepper committed
43
44
#ifdef HAVE_LIBNUMA
#include <numa.h>
45
46
#endif

47
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
48
#include "engine.h"
49
50

/* Local headers. */
51
#include "active.h"
52
#include "atomic.h"
53
#include "cell.h"
54
#include "clocks.h"
55
56
#include "cycle.h"
#include "debug.h"
57
#include "error.h"
58
#include "gravity.h"
59
#include "hydro.h"
60
#include "minmax.h"
61
#include "parallel_io.h"
62
#include "part.h"
63
#include "partition.h"
James Willis's avatar
James Willis committed
64
#include "profiler.h"
65
66
#include "proxy.h"
#include "runner.h"
67
68
#include "serial_io.h"
#include "single_io.h"
69
#include "sort_part.h"
70
#include "statistics.h"
71
#include "timers.h"
72
#include "tools.h"
73
#include "units.h"
74
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
75

76
77
78
/* Particle cache size. */
#define CACHE_SIZE 512

79
const char *engine_policy_names[16] = {"none",
Pedro Gonnet's avatar
Pedro Gonnet committed
80
81
82
83
84
85
86
87
88
89
                                       "rand",
                                       "steal",
                                       "keep",
                                       "block",
                                       "cpu_tight",
                                       "mpi",
                                       "numa_affinity",
                                       "hydro",
                                       "self_gravity",
                                       "external_gravity",
90
                                       "cosmology_integration",
91
                                       "drift_all",
92
                                       "cooling",
93
                                       "sourceterms",
94
                                       "stars"};
Pedro Gonnet's avatar
Pedro Gonnet committed
95

96
97
98
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

99
100
101
102
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
103
 * @param l A pointer to the #link, will be modified atomically.
104
105
106
107
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
108
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
109

110
  /* Get the next free link. */
111
112
113
114
115
  const int ind = atomic_inc(&e->nr_links);
  if (ind >= e->size_links) {
    error("Link table overflow.");
  }
  struct link *res = &e->links[ind];
116

117
  /* Set it atomically. */
118
  res->t = t;
119
  res->next = atomic_swap(l, res);
120
}
121

122
123
124
125
126
127
/**
 * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
 * i.e. all the O(Npart) tasks.
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
128
129
 * Note that there is no need to recurse below the super-cell.
 *
130
131
132
 * @param e The #engine.
 * @param c The #cell.
 */
133
void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
134
135

  struct scheduler *s = &e->sched;
136
  const int periodic = e->s->periodic;
137
  const int is_hydro = (e->policy & engine_policy_hydro);
138
  const int is_self_gravity = (e->policy & engine_policy_self_gravity);
139
  const int is_with_cooling = (e->policy & engine_policy_cooling);
Matthieu Schaller's avatar
Matthieu Schaller committed
140
  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
141

142
  /* Are we in a super-cell ? */
143
  if (c->super == c) {
144
145
146
147

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

148
149
      /* Add the two half kicks */
      c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
150
                                   c, NULL);
151
152

      c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0,
153
                                   c, NULL);
Tom Theuns's avatar
Tom Theuns committed
154

155
156
      /* Add the time-step calculation task and its dependency */
      c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none,
157
                                      0, 0, c, NULL);
158
159

      scheduler_addunlock(s, c->kick2, c->timestep);
160
      scheduler_addunlock(s, c->timestep, c->kick1);
161

162
      if (is_self_gravity) {
163

164
165
        /* Initialisation of the multipoles */
        c->init_grav = scheduler_addtask(s, task_type_init_grav,
166
                                         task_subtype_none, 0, 0, c, NULL);
167

168
169
        /* Gravity non-neighbouring pm calculations */
        c->grav_long_range = scheduler_addtask(
170
            s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL);
171

172
173
        /* Gravity recursive down-pass */
        c->grav_down = scheduler_addtask(s, task_type_grav_down,
174
                                         task_subtype_none, 0, 0, c, NULL);
175

176
        if (periodic) scheduler_addunlock(s, c->init_grav, c->grav_ghost[0]);
177
        scheduler_addunlock(s, c->init_grav, c->grav_long_range);
178
179
        scheduler_addunlock(s, c->grav_long_range, c->grav_down);
        scheduler_addunlock(s, c->grav_down, c->kick2);
180
181
      }

182
      /* Generate the ghost task. */
183
      if (is_hydro)
Matthieu Schaller's avatar
Matthieu Schaller committed
184
        c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
185
                                     0, c, NULL);
186
187
188

#ifdef EXTRA_HYDRO_LOOP
      /* Generate the extra ghost task. */
189
      if (is_hydro)
Matthieu Schaller's avatar
Matthieu Schaller committed
190
        c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
191
                                           task_subtype_none, 0, 0, c, NULL);
192
#endif
193

194
      /* Cooling task */
195
      if (is_with_cooling) {
Matthieu Schaller's avatar
Matthieu Schaller committed
196
        c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
197
                                       0, 0, c, NULL);
198
199
200
201

        scheduler_addunlock(s, c->cooling, c->kick2);
      }

202
      /* add source terms */
203
      if (is_with_sourceterms) {
204
        c->sourceterms = scheduler_addtask(s, task_type_sourceterms,
205
                                           task_subtype_none, 0, 0, c, NULL);
206
      }
207
208
    }

209
  } else { /* We are above the super-cell so need to go deeper */
210

211
212
213
#ifdef SWIFT_DEBUG_CHECKS
    if (c->super != NULL) error("Incorrectly set super pointer");
#endif
214

215
216
217
218
    /* Recurse. */
    if (c->split)
      for (int k = 0; k < 8; k++)
        if (c->progeny[k] != NULL)
219
          engine_make_hierarchical_tasks(e, c->progeny[k]);
220
  }
221
}
222

223
/**
224
 * @brief Redistribute the particles amongst the nodes according
225
226
 *      to their cell's node IDs.
 *
227
228
229
230
 * The strategy here is as follows:
 * 1) Each node counts the number of particles it has to send to each other
 * node.
 * 2) The number of particles of each type is then exchanged.
231
232
233
234
 * 3) The particles to send are placed in a temporary buffer in which the
 * part-gpart links are preserved.
 * 4) Each node allocates enough space for the new particles.
 * 5) (Asynchronous) communications are issued to transfer the data.
235
236
 *
 *
237
238
 * @param e The #engine.
 */
239
void engine_redistribute(struct engine *e) {
240

241
#ifdef WITH_MPI
242

243
244
  const int nr_nodes = e->nr_nodes;
  const int nodeID = e->nodeID;
245
  struct space *s = e->s;
246
  struct cell *cells = s->cells_top;
247
  const int nr_cells = s->nr_cells;
248
  const int *cdim = s->cdim;
249
  const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
250
251
252
253
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  struct part *parts = s->parts;
  struct xpart *xparts = s->xparts;
  struct gpart *gparts = s->gparts;
254
  struct spart *sparts = s->sparts;
255
  ticks tic = getticks();
256

257
258
  /* Allocate temporary arrays to store the counts of particles to be sent
     and the destination of each particle */
259
  int *counts, *g_counts, *s_counts;
260
  if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
261
    error("Failed to allocate counts temporary buffer.");
262
  if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
263
    error("Failed to allocate g_gcount temporary buffer.");
264
  if ((s_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
265
    error("Failed to allocate s_counts temporary buffer.");
266
  bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
267
  bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes);
268
  bzero(s_counts, sizeof(int) * nr_nodes * nr_nodes);
269

270
  /* Allocate the destination index arrays. */
271
  int *dest, *g_dest, *s_dest;
272
  if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
273
    error("Failed to allocate dest temporary buffer.");
274
  if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
275
    error("Failed to allocate g_dest temporary buffer.");
276
277
  if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL)
    error("Failed to allocate s_dest temporary buffer.");
278

279
  /* Get destination of each particle */
280
  for (size_t k = 0; k < s->nr_parts; k++) {
281
282

    /* Periodic boundary conditions */
283
    for (int j = 0; j < 3; j++) {
284
285
286
287
288
      if (parts[k].x[j] < 0.0)
        parts[k].x[j] += dim[j];
      else if (parts[k].x[j] >= dim[j])
        parts[k].x[j] -= dim[j];
    }
James Willis's avatar
James Willis committed
289
290
291
    const int cid =
        cell_getid(cdim, parts[k].x[0] * iwidth[0], parts[k].x[1] * iwidth[1],
                   parts[k].x[2] * iwidth[2]);
292
293
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
294
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
295
296
297
            parts[k].x[0], parts[k].x[1], parts[k].x[2]);
#endif

298
    dest[k] = cells[cid].nodeID;
299
300

    /* The counts array is indexed as count[from * nr_nodes + to]. */
301
302
    counts[nodeID * nr_nodes + dest[k]] += 1;
  }
303
304

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
305
  if (s->nr_parts > 0)
306
    space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
307

308
309
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
310
311
312
313
  for (size_t k = 0; k < s->nr_parts; k++) {
    const struct part *p = &s->parts[k];

    /* New cell index */
314
    const int new_cid =
315
316
317
318
        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
                   p->x[2] * s->iwidth[2]);

    /* New cell of this part */
319
320
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
321

322
323
    if (dest[k] != new_node)
      error("part's new node index not matching sorted index.");
324
325
326
327
328

    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
      error("part not sorted into the right top-level cell!");
329
330
331
  }
#endif

332
  /* We need to re-link the gpart partners of parts. */
333
334
335
336
337
338
  if (s->nr_parts > 0) {
    int current_dest = dest[0];
    size_t count_this_dest = 0;
    for (size_t k = 0; k < s->nr_parts; ++k) {
      if (s->parts[k].gpart != NULL) {

339
340
341
        /* As the addresses will be invalidated by the communications, we will
         * instead store the absolute index from the start of the sub-array of
         * particles to be sent to a given node.
342
         * Recall that gparts without partners have a positive id.
343
         * We will restore the pointers on the receiving node later on. */
344
345
346
347
        if (dest[k] != current_dest) {
          current_dest = dest[k];
          count_this_dest = 0;
        }
348

349
#ifdef SWIFT_DEBUG_CHECKS
350
        if (s->parts[k].gpart->id_or_neg_offset > 0)
351
352
          error("Trying to link a partnerless gpart !");
#endif
353

354
        s->parts[k].gpart->id_or_neg_offset = -count_this_dest;
355
        count_this_dest++;
356
357
358
359
      }
    }
  }

360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
  /* Get destination of each s-particle */
  for (size_t k = 0; k < s->nr_sparts; k++) {

    /* Periodic boundary conditions */
    for (int j = 0; j < 3; j++) {
      if (sparts[k].x[j] < 0.0)
        sparts[k].x[j] += dim[j];
      else if (sparts[k].x[j] >= dim[j])
        sparts[k].x[j] -= dim[j];
    }
    const int cid =
        cell_getid(cdim, sparts[k].x[0] * iwidth[0], sparts[k].x[1] * iwidth[1],
                   sparts[k].x[2] * iwidth[2]);
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
            sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]);
#endif

    s_dest[k] = cells[cid].nodeID;

    /* The counts array is indexed as count[from * nr_nodes + to]. */
    s_counts[nodeID * nr_nodes + s_dest[k]] += 1;
  }

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
386
  if (s->nr_sparts > 0)
387
    space_sparts_sort(s, s_dest, s->nr_sparts, 0, nr_nodes - 1, e->verbose);
388

389
390
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the spart have been sorted correctly. */
391
392
393
394
  for (size_t k = 0; k < s->nr_sparts; k++) {
    const struct spart *sp = &s->sparts[k];

    /* New cell index */
395
    const int new_cid =
396
397
398
399
        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
                   sp->x[2] * s->iwidth[2]);

    /* New cell of this spart */
400
401
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
402

403
404
    if (s_dest[k] != new_node)
      error("spart's new node index not matching sorted index.");
405
406
407
408
409

    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
      error("spart not sorted into the right top-level cell!");
410
411
412
  }
#endif

413
  /* We need to re-link the gpart partners of sparts. */
414
415
416
  if (s->nr_sparts > 0) {
    int current_dest = s_dest[0];
    size_t count_this_dest = 0;
417
    for (size_t k = 0; k < s->nr_sparts; ++k) {
418
419
420
421
422
      if (s->sparts[k].gpart != NULL) {

        /* As the addresses will be invalidated by the communications, we will
         * instead store the absolute index from the start of the sub-array of
         * particles to be sent to a given node.
423
         * Recall that gparts without partners have a positive id.
424
425
426
427
428
429
430
         * We will restore the pointers on the receiving node later on. */
        if (s_dest[k] != current_dest) {
          current_dest = s_dest[k];
          count_this_dest = 0;
        }

#ifdef SWIFT_DEBUG_CHECKS
431
        if (s->sparts[k].gpart->id_or_neg_offset > 0)
432
433
434
435
436
437
438
439
440
          error("Trying to link a partnerless gpart !");
#endif

        s->sparts[k].gpart->id_or_neg_offset = -count_this_dest;
        count_this_dest++;
      }
    }
  }

441
  /* Get destination of each g-particle */
442
  for (size_t k = 0; k < s->nr_gparts; k++) {
443
444

    /* Periodic boundary conditions */
445
    for (int j = 0; j < 3; j++) {
446
447
448
449
      if (gparts[k].x[j] < 0.0)
        gparts[k].x[j] += dim[j];
      else if (gparts[k].x[j] >= dim[j])
        gparts[k].x[j] -= dim[j];
450
    }
James Willis's avatar
James Willis committed
451
452
453
    const int cid =
        cell_getid(cdim, gparts[k].x[0] * iwidth[0], gparts[k].x[1] * iwidth[1],
                   gparts[k].x[2] * iwidth[2]);
454
455
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
456
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
457
458
459
            gparts[k].x[0], gparts[k].x[1], gparts[k].x[2]);
#endif

460
    g_dest[k] = cells[cid].nodeID;
461
462

    /* The counts array is indexed as count[from * nr_nodes + to]. */
463
    g_counts[nodeID * nr_nodes + g_dest[k]] += 1;
464
  }
465
466

  /* Sort the gparticles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
467
  if (s->nr_gparts > 0)
468
    space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
469

470
471
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the gpart have been sorted correctly. */
472
473
474
475
  for (size_t k = 0; k < s->nr_gparts; k++) {
    const struct gpart *gp = &s->gparts[k];

    /* New cell index */
476
    const int new_cid =
477
478
479
480
        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
                   gp->x[2] * s->iwidth[2]);

    /* New cell of this gpart */
481
482
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
483

484
485
    if (g_dest[k] != new_node)
      error("gpart's new node index not matching sorted index.");
486
487
488
489
490

    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
      error("gpart not sorted into the right top-level cell!");
491
492
493
  }
#endif

494
495
496
497
498
  /* Get all the counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce particle transfer counts.");

499
  /* Get all the s_counts from all the nodes. */
500
501
502
503
504
505
506
507
508
  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce gparticle transfer counts.");

  /* Get all the g_counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce sparticle transfer counts.");

Peter W. Draper's avatar
Peter W. Draper committed
509
  /* Report how many particles will be moved. */
510
511
  if (e->verbose) {
    if (e->nodeID == 0) {
512
513
      size_t total = 0, g_total = 0, s_total = 0;
      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0;
514
515
516
      for (int p = 0, r = 0; p < nr_nodes; p++) {
        for (int s = 0; s < nr_nodes; s++) {
          total += counts[r];
517
518
519
520
521
522
523
          g_total += g_counts[r];
          s_total += s_counts[r];
          if (p == s) {
            unmoved += counts[r];
            g_unmoved += g_counts[r];
            s_unmoved += s_counts[r];
          }
524
525
526
          r++;
        }
      }
Matthieu Schaller's avatar
Matthieu Schaller committed
527
528
529
530
531
532
533
534
535
536
537
      if (total > 0)
        message("%ld of %ld (%.2f%%) of particles moved", total - unmoved,
                total, 100.0 * (double)(total - unmoved) / (double)total);
      if (g_total > 0)
        message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved,
                g_total,
                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
      if (s_total > 0)
        message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved,
                s_total,
                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
538
    }
539
540
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
541
  /* Each node knows how many parts, sparts and gparts will be transferred
542
     to every other node. We can start preparing to receive data */
543
544

  /* Get the new number of parts and gparts for this node */
545
  size_t nr_parts = 0, nr_gparts = 0, nr_sparts = 0;
546
  for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
547
548
  for (int k = 0; k < nr_nodes; k++)
    nr_gparts += g_counts[k * nr_nodes + nodeID];
549
550
  for (int k = 0; k < nr_nodes; k++)
    nr_sparts += s_counts[k * nr_nodes + nodeID];
551
552

  /* Allocate the new arrays with some extra margin */
553
  struct part *parts_new = NULL;
554
  struct xpart *xparts_new = NULL;
555
  struct gpart *gparts_new = NULL;
556
  struct spart *sparts_new = NULL;
557
  if (posix_memalign((void **)&parts_new, part_align,
558
559
                     sizeof(struct part) * nr_parts *
                         engine_redistribute_alloc_margin) != 0)
560
    error("Failed to allocate new part data.");
561
562
563
564
565
566
567
568
  if (posix_memalign((void **)&xparts_new, xpart_align,
                     sizeof(struct xpart) * nr_parts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new xpart data.");
  if (posix_memalign((void **)&gparts_new, gpart_align,
                     sizeof(struct gpart) * nr_gparts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new gpart data.");
569
570
571
572
  if (posix_memalign((void **)&sparts_new, spart_align,
                     sizeof(struct spart) * nr_sparts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new spart data.");
573
574

  /* Prepare MPI requests for the asynchronous communications */
575
  MPI_Request *reqs;
576
  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 8 * nr_nodes)) ==
577
578
      NULL)
    error("Failed to allocate MPI request list.");
579
  for (int k = 0; k < 8 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
580
581
582
583

  /* Emit the sends and recvs for the particle and gparticle data. */
  size_t offset_send = 0, offset_recv = 0;
  size_t g_offset_send = 0, g_offset_recv = 0;
584
  size_t s_offset_send = 0, s_offset_recv = 0;
585
586
587
588
589
590
591
  for (int k = 0; k < nr_nodes; k++) {

    /* Indices in the count arrays of the node of interest */
    const int ind_send = nodeID * nr_nodes + k;
    const int ind_recv = k * nr_nodes + nodeID;

    /* Are we sending any part/xpart ? */
592
    if (counts[ind_send] > 0) {
593

594
      /* message("Sending %d part to node %d", counts[ind_send], k); */
Matthieu Schaller's avatar
Matthieu Schaller committed
595

596
      /* If the send is to the same node, just copy */
597
598
599
600
601
602
603
      if (k == nodeID) {
        memcpy(&parts_new[offset_recv], &s->parts[offset_send],
               sizeof(struct part) * counts[ind_recv]);
        memcpy(&xparts_new[offset_recv], &s->xparts[offset_send],
               sizeof(struct xpart) * counts[ind_recv]);
        offset_send += counts[ind_send];
        offset_recv += counts[ind_recv];
604
605

        /* Else, emit some communications */
606
      } else {
607
        if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type,
608
609
                      k, 4 * ind_send + 0, MPI_COMM_WORLD,
                      &reqs[8 * k + 0]) != MPI_SUCCESS)
610
          error("Failed to isend parts to node %i.", k);
611
        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type,
612
613
                      k, 4 * ind_send + 1, MPI_COMM_WORLD,
                      &reqs[8 * k + 1]) != MPI_SUCCESS)
614
          error("Failed to isend xparts to node %i.", k);
615
616
        offset_send += counts[ind_send];
      }
617
    }
618
619
620
621

    /* Are we sending any gpart ? */
    if (g_counts[ind_send] > 0) {

622
      /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */
623

624
625
      /* If the send is to the same node, just copy */
      if (k == nodeID) {
626
        memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send],
627
628
629
630
631
632
633
               sizeof(struct gpart) * g_counts[ind_recv]);
        g_offset_send += g_counts[ind_send];
        g_offset_recv += g_counts[ind_recv];

        /* Else, emit some communications */
      } else {
        if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send],
634
635
                      gpart_mpi_type, k, 4 * ind_send + 2, MPI_COMM_WORLD,
                      &reqs[8 * k + 2]) != MPI_SUCCESS)
636
          error("Failed to isend gparts to node %i.", k);
637
        g_offset_send += g_counts[ind_send];
638
639
640
      }
    }

641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
    /* Are we sending any spart ? */
    if (s_counts[ind_send] > 0) {

      /* message("Sending %d spart to node %d", s_counts[ind_send], k); */

      /* If the send is to the same node, just copy */
      if (k == nodeID) {
        memcpy(&sparts_new[s_offset_recv], &s->sparts[s_offset_send],
               sizeof(struct spart) * s_counts[ind_recv]);
        s_offset_send += s_counts[ind_send];
        s_offset_recv += s_counts[ind_recv];

        /* Else, emit some communications */
      } else {
        if (MPI_Isend(&s->sparts[s_offset_send], s_counts[ind_send],
                      spart_mpi_type, k, 4 * ind_send + 3, MPI_COMM_WORLD,
                      &reqs[8 * k + 3]) != MPI_SUCCESS)
          error("Failed to isend gparts to node %i.", k);
        s_offset_send += s_counts[ind_send];
      }
    }

663
664
665
    /* Now emit the corresponding Irecv() */

    /* Are we receiving any part/xpart from this node ? */
666
    if (k != nodeID && counts[ind_recv] > 0) {
667
      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k,
668
669
                    4 * ind_recv + 0, MPI_COMM_WORLD,
                    &reqs[8 * k + 4]) != MPI_SUCCESS)
670
        error("Failed to emit irecv of parts from node %i.", k);
671
      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type,
672
673
                    k, 4 * ind_recv + 1, MPI_COMM_WORLD,
                    &reqs[8 * k + 5]) != MPI_SUCCESS)
674
        error("Failed to emit irecv of xparts from node %i.", k);
675
      offset_recv += counts[ind_recv];
676
    }
677
678
679
680

    /* Are we receiving any gpart from this node ? */
    if (k != nodeID && g_counts[ind_recv] > 0) {
      if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv],
681
682
                    gpart_mpi_type, k, 4 * ind_recv + 2, MPI_COMM_WORLD,
                    &reqs[8 * k + 6]) != MPI_SUCCESS)
683
684
685
        error("Failed to emit irecv of gparts from node %i.", k);
      g_offset_recv += g_counts[ind_recv];
    }
686
687
688
689
690
691
692
693
694

    /* Are we receiving any spart from this node ? */
    if (k != nodeID && s_counts[ind_recv] > 0) {
      if (MPI_Irecv(&sparts_new[s_offset_recv], s_counts[ind_recv],
                    spart_mpi_type, k, 4 * ind_recv + 3, MPI_COMM_WORLD,
                    &reqs[8 * k + 7]) != MPI_SUCCESS)
        error("Failed to emit irecv of sparts from node %i.", k);
      s_offset_recv += s_counts[ind_recv];
    }
695
  }
696

697
  /* Wait for all the sends and recvs to tumble in. */
698
  MPI_Status stats[8 * nr_nodes];
699
  int res;
700
701
  if ((res = MPI_Waitall(8 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
    for (int k = 0; k < 8 * nr_nodes; k++) {
702
703
704
      char buff[MPI_MAX_ERROR_STRING];
      MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
      message("request %i has error '%s'.", k, buff);
705
    }
706
707
    error("Failed during waitall for part data.");
  }
708

709
710
711
712
713
  /* All particles have now arrived. Time for some final operations on the
     stuff we just received */

  /* Restore the part<->gpart and spart<->gpart links */
  size_t offset_parts = 0, offset_sparts = 0, offset_gparts = 0;
714
715
716
717
718
  for (int node = 0; node < nr_nodes; ++node) {

    const int ind_recv = node * nr_nodes + nodeID;
    const size_t count_parts = counts[ind_recv];
    const size_t count_gparts = g_counts[ind_recv];
719
    const size_t count_sparts = s_counts[ind_recv];
720
721
722
723

    /* Loop over the gparts received from that node */
    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) {

724
725
      /* Does this gpart have a gas partner ? */
      if (gparts_new[k].type == swift_type_gas) {
726

Matthieu Schaller's avatar
Style    
Matthieu Schaller committed
727
728
        const ptrdiff_t partner_index =
            offset_parts - gparts_new[k].id_or_neg_offset;
729
730

        /* Re-link */
731
        gparts_new[k].id_or_neg_offset = -partner_index;
Pedro Gonnet's avatar
Pedro Gonnet committed
732
        parts_new[partner_index].gpart = &gparts_new[k];
733
      }
734
735
736
737
738
739
740
741
742
743
744

      /* Does this gpart have a star partner ? */
      if (gparts_new[k].type == swift_type_star) {

        const ptrdiff_t partner_index =
            offset_sparts - gparts_new[k].id_or_neg_offset;

        /* Re-link */
        gparts_new[k].id_or_neg_offset = -partner_index;
        sparts_new[partner_index].gpart = &gparts_new[k];
      }
745
746
747
748
    }

    offset_parts += count_parts;
    offset_gparts += count_gparts;
749
    offset_sparts += count_sparts;
750
751
  }

752
#ifdef SWIFT_DEBUG_CHECKS
753
  /* Verify that all parts are in the right place. */
754
755
756
757
  for (size_t k = 0; k < nr_parts; k++) {
    const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
                               parts_new[k].x[1] * iwidth[1],
                               parts_new[k].x[2] * iwidth[2]);
758
    if (cells[cid].nodeID != nodeID)
759
      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
760
761
            cells[cid].nodeID);
  }
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
  for (size_t k = 0; k < nr_gparts; k++) {
    const int cid = cell_getid(cdim, gparts_new[k].x[0] * iwidth[0],
                               gparts_new[k].x[1] * iwidth[1],
                               gparts_new[k].x[2] * iwidth[2]);
    if (cells[cid].nodeID != nodeID)
      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
            k, cells[cid].nodeID);
  }
  for (size_t k = 0; k < nr_sparts; k++) {
    const int cid = cell_getid(cdim, sparts_new[k].x[0] * iwidth[0],
                               sparts_new[k].x[1] * iwidth[1],
                               sparts_new[k].x[2] * iwidth[2]);
    if (cells[cid].nodeID != nodeID)
      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
            k, cells[cid].nodeID);
  }
778

779
780
  /* Verify that the links are correct */
  part_verify_links(parts_new, gparts_new, sparts_new, nr_parts, nr_gparts,
781
                    nr_sparts, e->verbose);
782
#endif
783
784
785
786

  /* Set the new part data, free the old. */
  free(parts);
  free(xparts);
787
  free(gparts);
788
  free(sparts);
789
790
  s->parts = parts_new;
  s->xparts = xparts_new;
791
  s->gparts = gparts_new;
792
  s->sparts = sparts_new;
793
  s->nr_parts = nr_parts;
794
  s->nr_gparts = nr_gparts;
795
  s->nr_sparts = nr_sparts;
796
797
  s->size_parts = engine_redistribute_alloc_margin * nr_parts;
  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts;
798
  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts;
799

800
  /* Clean up the temporary stuff. */
801
802
803
804
  free(reqs);
  free(counts);
  free(dest);

805
806
807
808
809
  /* Be verbose about what just happened. */
  if (e->verbose) {
    int my_cells = 0;
    for (int k = 0; k < nr_cells; k++)
      if (cells[k].nodeID == nodeID) my_cells += 1;
810
811
    message("node %i now has %zu parts, %zu sparts and %zu gparts in %i cells.",
            nodeID, nr_parts, nr_sparts, nr_gparts, my_cells);
812
813
  }

814
815
816
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
817
#else
818
  error("SWIFT was not compiled with MPI support.");
819
820
#endif
}
821

822
/**
823
 * @brief Repartition the cells amongst the nodes.
824
825
826
 *
 * @param e The #engine.
 */
827
void engine_repartition(struct engine *e) {
828
829
830

#if defined(WITH_MPI) && defined(HAVE_METIS)

831
832
  ticks tic = getticks();

833
#ifdef SWIFT_DEBUG_CHECKS
834
835
836
837
  /* Be verbose about this. */
  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
  fflush(stdout);

838
  /* Check that all cells have been drifted to the current time */
839
840
  space_check_drift_point(e->s, e->ti_old,
                          e->policy & engine_policy_self_gravity);
841
842
#endif

843
  /* Clear the repartition flag. */
844
  e->forcerepart = 0;
845

846
847
  /* Nothing to do if only using a single node. Also avoids METIS
   * bug that doesn't handle this case well. */
848
  if (e->nr_nodes == 1) return;
849

850
  /* Do the repartitioning. */
851
  partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
852
                        e->sched.tasks, e->sched.nr_tasks);
853
854
855
856

  /* Now comes the tricky part: Exchange particles between all nodes.
     This is done in two steps, first allreducing a matrix of
     how many particles go from where to where, then re-allocating
857
     the parts array, and emitting the sends and receives.
858
859
860
861
862
863
864
865
866
867
868
     Finally, the space, tasks, and proxies need to be rebuilt. */

  /* Redistribute the particles between the nodes. */
  engine_redistribute(e);

  /* Make the proxies. */
  engine_makeproxies(e);

  /* Tell the engine it should re-build whenever possible */
  e->forcerebuild = 1;

869
870
871
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
872
#else
873
  error("SWIFT was not compiled with MPI and METIS support.");</