engine.c 134 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
36
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
37

38
39
/* MPI headers. */
#ifdef WITH_MPI
40
#include <mpi.h>
41
42
#endif

Angus Lepper's avatar
Angus Lepper committed
43
44
#ifdef HAVE_LIBNUMA
#include <numa.h>
45
46
#endif

47
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
48
#include "engine.h"
49
50

/* Local headers. */
51
#include "active.h"
52
#include "atomic.h"
53
#include "cell.h"
54
#include "clocks.h"
55
56
#include "cycle.h"
#include "debug.h"
57
#include "error.h"
58
#include "gravity.h"
59
#include "hydro.h"
60
#include "minmax.h"
61
#include "parallel_io.h"
62
#include "part.h"
63
#include "partition.h"
James Willis's avatar
James Willis committed
64
#include "profiler.h"
65
66
#include "proxy.h"
#include "runner.h"
67
68
#include "serial_io.h"
#include "single_io.h"
69
#include "statistics.h"
70
#include "timers.h"
71
#include "tools.h"
72
#include "units.h"
73
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
74

75
76
77
/* Particle cache size. */
#define CACHE_SIZE 512

78
const char *engine_policy_names[16] = {"none",
Pedro Gonnet's avatar
Pedro Gonnet committed
79
80
81
82
83
84
85
86
87
88
                                       "rand",
                                       "steal",
                                       "keep",
                                       "block",
                                       "cpu_tight",
                                       "mpi",
                                       "numa_affinity",
                                       "hydro",
                                       "self_gravity",
                                       "external_gravity",
89
                                       "cosmology_integration",
90
                                       "drift_all",
91
                                       "cooling",
92
                                       "sourceterms",
93
                                       "stars"};
Pedro Gonnet's avatar
Pedro Gonnet committed
94

95
96
97
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

98
99
100
101
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
102
 * @param l A pointer to the #link, will be modified atomically.
103
104
105
106
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
107
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
108

109
  /* Get the next free link. */
110
111
112
113
114
  const int ind = atomic_inc(&e->nr_links);
  if (ind >= e->size_links) {
    error("Link table overflow.");
  }
  struct link *res = &e->links[ind];
115

116
  /* Set it atomically. */
117
  res->t = t;
118
  res->next = atomic_swap(l, res);
119
}
120

121
122
123
124
125
126
/**
 * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
 * i.e. all the O(Npart) tasks.
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
127
128
 * Note that there is no need to recurse below the super-cell.
 *
129
130
131
 * @param e The #engine.
 * @param c The #cell.
 */
132
void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) {
133
134

  struct scheduler *s = &e->sched;
135
  const int is_hydro = (e->policy & engine_policy_hydro);
136
  const int is_self_gravity = (e->policy & engine_policy_self_gravity);
137
  const int is_with_cooling = (e->policy & engine_policy_cooling);
Matthieu Schaller's avatar
Matthieu Schaller committed
138
  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
139

140
  /* Are we in a super-cell ? */
141
  if (c->super == c) {
142
143
144
145

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

Matthieu Schaller's avatar
Matthieu Schaller committed
146
      /* Add the init task. */
147
148
      c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c,
                                  NULL, 0);
149

150
151
152
153
154
155
      /* Add the two half kicks */
      c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
                                   c, NULL, 0);

      c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0,
                                   c, NULL, 0);
Tom Theuns's avatar
Tom Theuns committed
156

157
158
159
160
161
      /* Add the time-step calculation task and its dependency */
      c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none,
                                      0, 0, c, NULL, 0);

      scheduler_addunlock(s, c->kick2, c->timestep);
162
      scheduler_addunlock(s, c->timestep, c->kick1);
163

164
      /* Add the drift task and its dependencies. */
165
166
167
168
      c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0,
                                   c, NULL, 0);
      scheduler_addunlock(s, c->drift, c->init);

169
      if (is_self_gravity) {
170

171
172
173
174
175
176
177
178
        /* Gravity non-neighbouring pm calculations */
        c->grav_long_range = scheduler_addtask(
            s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL, 0);

        /* Gravity top-level periodic calculation */
        c->grav_top_level = scheduler_addtask(
            s, task_type_grav_top_level, task_subtype_none, 0, 0, c, NULL, 0);

179
180
181
182
        /* Gravity recursive down-pass */
        c->grav_down = scheduler_addtask(s, task_type_grav_down,
                                         task_subtype_none, 0, 0, c, NULL, 0);

183
184
        scheduler_addunlock(s, c->init, c->grav_long_range);
        scheduler_addunlock(s, c->init, c->grav_top_level);
185
186
187
        scheduler_addunlock(s, c->grav_long_range, c->grav_down);
        scheduler_addunlock(s, c->grav_top_level, c->grav_down);
        scheduler_addunlock(s, c->grav_down, c->kick2);
188
189
      }

190
      /* Generate the ghost task. */
191
      if (is_hydro)
Matthieu Schaller's avatar
Matthieu Schaller committed
192
193
        c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0,
                                     0, c, NULL, 0);
194
195
196

#ifdef EXTRA_HYDRO_LOOP
      /* Generate the extra ghost task. */
197
      if (is_hydro)
Matthieu Schaller's avatar
Matthieu Schaller committed
198
199
        c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
                                           task_subtype_none, 0, 0, c, NULL, 0);
200
#endif
201

202
      /* Cooling task */
203
      if (is_with_cooling) {
Matthieu Schaller's avatar
Matthieu Schaller committed
204
205
        c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
                                       0, 0, c, NULL, 0);
206
207
208
209

        scheduler_addunlock(s, c->cooling, c->kick2);
      }

210
      /* add source terms */
211
      if (is_with_sourceterms) {
212
213
        c->sourceterms = scheduler_addtask(s, task_type_sourceterms,
                                           task_subtype_none, 0, 0, c, NULL, 0);
214
      }
215
216
    }

217
  } else { /* We are above the super-cell so need to go deeper */
218

219
220
221
#ifdef SWIFT_DEBUG_CHECKS
    if (c->super != NULL) error("Incorrectly set super pointer");
#endif
222

223
224
225
226
    /* Recurse. */
    if (c->split)
      for (int k = 0; k < 8; k++)
        if (c->progeny[k] != NULL)
227
          engine_make_hierarchical_tasks(e, c->progeny[k]);
228
  }
229
}
230

231
/**
232
 * @brief Redistribute the particles amongst the nodes according
233
234
 *      to their cell's node IDs.
 *
235
236
237
238
 * The strategy here is as follows:
 * 1) Each node counts the number of particles it has to send to each other
 * node.
 * 2) The number of particles of each type is then exchanged.
239
240
241
242
 * 3) The particles to send are placed in a temporary buffer in which the
 * part-gpart links are preserved.
 * 4) Each node allocates enough space for the new particles.
 * 5) (Asynchronous) communications are issued to transfer the data.
243
244
 *
 *
245
246
 * @param e The #engine.
 */
247
void engine_redistribute(struct engine *e) {
248

249
#ifdef WITH_MPI
250

251
252
  const int nr_nodes = e->nr_nodes;
  const int nodeID = e->nodeID;
253
  struct space *s = e->s;
254
  struct cell *cells = s->cells_top;
255
  const int nr_cells = s->nr_cells;
256
  const int *cdim = s->cdim;
257
  const double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
258
259
260
261
  const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
  struct part *parts = s->parts;
  struct xpart *xparts = s->xparts;
  struct gpart *gparts = s->gparts;
262
  struct spart *sparts = s->sparts;
263
  ticks tic = getticks();
264

265
266
  /* Allocate temporary arrays to store the counts of particles to be sent
     and the destination of each particle */
267
  int *counts, *g_counts, *s_counts;
268
  if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
269
    error("Failed to allocate counts temporary buffer.");
270
  if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
271
    error("Failed to allocate g_gcount temporary buffer.");
272
  if ((s_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL)
273
    error("Failed to allocate s_counts temporary buffer.");
274
  bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
275
  bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes);
276
  bzero(s_counts, sizeof(int) * nr_nodes * nr_nodes);
277

278
  /* Allocate the destination index arrays. */
279
  int *dest, *g_dest, *s_dest;
280
  if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
281
    error("Failed to allocate dest temporary buffer.");
282
  if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL)
283
    error("Failed to allocate g_dest temporary buffer.");
284
285
  if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL)
    error("Failed to allocate s_dest temporary buffer.");
286

287
  /* Get destination of each particle */
288
  for (size_t k = 0; k < s->nr_parts; k++) {
289
290

    /* Periodic boundary conditions */
291
    for (int j = 0; j < 3; j++) {
292
293
294
295
296
      if (parts[k].x[j] < 0.0)
        parts[k].x[j] += dim[j];
      else if (parts[k].x[j] >= dim[j])
        parts[k].x[j] -= dim[j];
    }
James Willis's avatar
James Willis committed
297
298
299
    const int cid =
        cell_getid(cdim, parts[k].x[0] * iwidth[0], parts[k].x[1] * iwidth[1],
                   parts[k].x[2] * iwidth[2]);
300
301
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
302
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
303
304
305
            parts[k].x[0], parts[k].x[1], parts[k].x[2]);
#endif

306
    dest[k] = cells[cid].nodeID;
307
308

    /* The counts array is indexed as count[from * nr_nodes + to]. */
309
310
    counts[nodeID * nr_nodes + dest[k]] += 1;
  }
311
312

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
313
  if (s->nr_parts > 0)
314
    space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose);
315

316
317
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
318
319
320
321
  for (size_t k = 0; k < s->nr_parts; k++) {
    const struct part *p = &s->parts[k];

    /* New cell index */
322
    const int new_cid =
323
324
325
326
        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
                   p->x[2] * s->iwidth[2]);

    /* New cell of this part */
327
328
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
329

330
331
    if (dest[k] != new_node)
      error("part's new node index not matching sorted index.");
332
333
334
335
336

    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
      error("part not sorted into the right top-level cell!");
337
338
  }
#endif
339

340
  /* We need to re-link the gpart partners of parts. */
341
342
343
344
345
346
  if (s->nr_parts > 0) {
    int current_dest = dest[0];
    size_t count_this_dest = 0;
    for (size_t k = 0; k < s->nr_parts; ++k) {
      if (s->parts[k].gpart != NULL) {

347
348
349
        /* As the addresses will be invalidated by the communications, we will
         * instead store the absolute index from the start of the sub-array of
         * particles to be sent to a given node.
350
         * Recall that gparts without partners have a positive id.
351
         * We will restore the pointers on the receiving node later on. */
352
353
354
355
        if (dest[k] != current_dest) {
          current_dest = dest[k];
          count_this_dest = 0;
        }
356

357
#ifdef SWIFT_DEBUG_CHECKS
358
        if (s->parts[k].gpart->id_or_neg_offset > 0)
359
360
          error("Trying to link a partnerless gpart !");
#endif
361

362
        s->parts[k].gpart->id_or_neg_offset = -count_this_dest;
363
        count_this_dest++;
364
365
366
367
      }
    }
  }

368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
  /* Get destination of each s-particle */
  for (size_t k = 0; k < s->nr_sparts; k++) {

    /* Periodic boundary conditions */
    for (int j = 0; j < 3; j++) {
      if (sparts[k].x[j] < 0.0)
        sparts[k].x[j] += dim[j];
      else if (sparts[k].x[j] >= dim[j])
        sparts[k].x[j] -= dim[j];
    }
    const int cid =
        cell_getid(cdim, sparts[k].x[0] * iwidth[0], sparts[k].x[1] * iwidth[1],
                   sparts[k].x[2] * iwidth[2]);
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
            sparts[k].x[0], sparts[k].x[1], sparts[k].x[2]);
#endif

    s_dest[k] = cells[cid].nodeID;

    /* The counts array is indexed as count[from * nr_nodes + to]. */
    s_counts[nodeID * nr_nodes + s_dest[k]] += 1;
  }

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
394
  if (s->nr_sparts > 0)
395
    space_sparts_sort(s, s_dest, s->nr_sparts, 0, nr_nodes - 1, e->verbose);
396

397
398
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the spart have been sorted correctly. */
399
400
401
402
  for (size_t k = 0; k < s->nr_sparts; k++) {
    const struct spart *sp = &s->sparts[k];

    /* New cell index */
403
    const int new_cid =
404
405
406
407
        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
                   sp->x[2] * s->iwidth[2]);

    /* New cell of this spart */
408
409
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
410

411
412
    if (s_dest[k] != new_node)
      error("spart's new node index not matching sorted index.");
413
414
415
416
417

    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
      error("spart not sorted into the right top-level cell!");
418
419
420
  }
#endif

421
  /* We need to re-link the gpart partners of sparts. */
422
423
424
  if (s->nr_sparts > 0) {
    int current_dest = s_dest[0];
    size_t count_this_dest = 0;
425
    for (size_t k = 0; k < s->nr_sparts; ++k) {
426
427
428
429
430
      if (s->sparts[k].gpart != NULL) {

        /* As the addresses will be invalidated by the communications, we will
         * instead store the absolute index from the start of the sub-array of
         * particles to be sent to a given node.
431
         * Recall that gparts without partners have a positive id.
432
433
434
435
436
437
438
         * We will restore the pointers on the receiving node later on. */
        if (s_dest[k] != current_dest) {
          current_dest = s_dest[k];
          count_this_dest = 0;
        }

#ifdef SWIFT_DEBUG_CHECKS
439
        if (s->sparts[k].gpart->id_or_neg_offset > 0)
440
441
442
443
444
445
446
447
448
          error("Trying to link a partnerless gpart !");
#endif

        s->sparts[k].gpart->id_or_neg_offset = -count_this_dest;
        count_this_dest++;
      }
    }
  }

449
  /* Get destination of each g-particle */
450
  for (size_t k = 0; k < s->nr_gparts; k++) {
451
452

    /* Periodic boundary conditions */
453
    for (int j = 0; j < 3; j++) {
454
455
456
457
      if (gparts[k].x[j] < 0.0)
        gparts[k].x[j] += dim[j];
      else if (gparts[k].x[j] >= dim[j])
        gparts[k].x[j] -= dim[j];
458
    }
James Willis's avatar
James Willis committed
459
460
461
    const int cid =
        cell_getid(cdim, gparts[k].x[0] * iwidth[0], gparts[k].x[1] * iwidth[1],
                   gparts[k].x[2] * iwidth[2]);
462
463
#ifdef SWIFT_DEBUG_CHECKS
    if (cid < 0 || cid >= s->nr_cells)
464
      error("Bad cell id %i for part %zu at [%.3e,%.3e,%.3e].", cid, k,
465
466
467
            gparts[k].x[0], gparts[k].x[1], gparts[k].x[2]);
#endif

468
    g_dest[k] = cells[cid].nodeID;
469
470

    /* The counts array is indexed as count[from * nr_nodes + to]. */
471
    g_counts[nodeID * nr_nodes + g_dest[k]] += 1;
472
  }
473
474

  /* Sort the gparticles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
475
  if (s->nr_gparts > 0)
476
    space_gparts_sort(s, g_dest, s->nr_gparts, 0, nr_nodes - 1, e->verbose);
477

478
479
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the gpart have been sorted correctly. */
480
481
482
483
  for (size_t k = 0; k < s->nr_gparts; k++) {
    const struct gpart *gp = &s->gparts[k];

    /* New cell index */
484
    const int new_cid =
485
486
487
488
        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
                   gp->x[2] * s->iwidth[2]);

    /* New cell of this gpart */
489
490
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
491

492
493
    if (g_dest[k] != new_node)
      error("gpart's new node index not matching sorted index.");
494
495
496
497
498

    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
      error("gpart not sorted into the right top-level cell!");
499
500
  }
#endif
501

502
503
504
505
506
  /* Get all the counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce particle transfer counts.");

507
  /* Get all the s_counts from all the nodes. */
508
509
510
511
512
513
514
515
516
  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce gparticle transfer counts.");

  /* Get all the g_counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce sparticle transfer counts.");

Peter W. Draper's avatar
Peter W. Draper committed
517
  /* Report how many particles will be moved. */
518
519
  if (e->verbose) {
    if (e->nodeID == 0) {
520
521
      size_t total = 0, g_total = 0, s_total = 0;
      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0;
522
523
524
      for (int p = 0, r = 0; p < nr_nodes; p++) {
        for (int s = 0; s < nr_nodes; s++) {
          total += counts[r];
525
526
527
528
529
530
531
          g_total += g_counts[r];
          s_total += s_counts[r];
          if (p == s) {
            unmoved += counts[r];
            g_unmoved += g_counts[r];
            s_unmoved += s_counts[r];
          }
532
533
534
          r++;
        }
      }
Matthieu Schaller's avatar
Matthieu Schaller committed
535
536
537
538
539
540
541
542
543
544
545
      if (total > 0)
        message("%ld of %ld (%.2f%%) of particles moved", total - unmoved,
                total, 100.0 * (double)(total - unmoved) / (double)total);
      if (g_total > 0)
        message("%ld of %ld (%.2f%%) of g-particles moved", g_total - g_unmoved,
                g_total,
                100.0 * (double)(g_total - g_unmoved) / (double)g_total);
      if (s_total > 0)
        message("%ld of %ld (%.2f%%) of s-particles moved", s_total - s_unmoved,
                s_total,
                100.0 * (double)(s_total - s_unmoved) / (double)s_total);
546
    }
547
548
  }

Matthieu Schaller's avatar
Matthieu Schaller committed
549
  /* Each node knows how many parts, sparts and gparts will be transferred
550
     to every other node. We can start preparing to receive data */
551
552

  /* Get the new number of parts and gparts for this node */
553
  size_t nr_parts = 0, nr_gparts = 0, nr_sparts = 0;
554
  for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
555
556
  for (int k = 0; k < nr_nodes; k++)
    nr_gparts += g_counts[k * nr_nodes + nodeID];
557
558
  for (int k = 0; k < nr_nodes; k++)
    nr_sparts += s_counts[k * nr_nodes + nodeID];
559
560

  /* Allocate the new arrays with some extra margin */
561
  struct part *parts_new = NULL;
562
  struct xpart *xparts_new = NULL;
563
  struct gpart *gparts_new = NULL;
564
  struct spart *sparts_new = NULL;
565
  if (posix_memalign((void **)&parts_new, part_align,
566
567
                     sizeof(struct part) * nr_parts *
                         engine_redistribute_alloc_margin) != 0)
568
    error("Failed to allocate new part data.");
569
570
571
572
573
574
575
576
  if (posix_memalign((void **)&xparts_new, xpart_align,
                     sizeof(struct xpart) * nr_parts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new xpart data.");
  if (posix_memalign((void **)&gparts_new, gpart_align,
                     sizeof(struct gpart) * nr_gparts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new gpart data.");
577
578
579
580
  if (posix_memalign((void **)&sparts_new, spart_align,
                     sizeof(struct spart) * nr_sparts *
                         engine_redistribute_alloc_margin) != 0)
    error("Failed to allocate new spart data.");
581
582

  /* Prepare MPI requests for the asynchronous communications */
583
  MPI_Request *reqs;
584
  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 8 * nr_nodes)) ==
585
586
      NULL)
    error("Failed to allocate MPI request list.");
587
  for (int k = 0; k < 8 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
588
589
590
591

  /* Emit the sends and recvs for the particle and gparticle data. */
  size_t offset_send = 0, offset_recv = 0;
  size_t g_offset_send = 0, g_offset_recv = 0;
592
  size_t s_offset_send = 0, s_offset_recv = 0;
593
594
595
596
597
598
599
  for (int k = 0; k < nr_nodes; k++) {

    /* Indices in the count arrays of the node of interest */
    const int ind_send = nodeID * nr_nodes + k;
    const int ind_recv = k * nr_nodes + nodeID;

    /* Are we sending any part/xpart ? */
600
    if (counts[ind_send] > 0) {
601

602
      /* message("Sending %d part to node %d", counts[ind_send], k); */
Matthieu Schaller's avatar
Matthieu Schaller committed
603

604
      /* If the send is to the same node, just copy */
605
606
607
608
609
610
611
      if (k == nodeID) {
        memcpy(&parts_new[offset_recv], &s->parts[offset_send],
               sizeof(struct part) * counts[ind_recv]);
        memcpy(&xparts_new[offset_recv], &s->xparts[offset_send],
               sizeof(struct xpart) * counts[ind_recv]);
        offset_send += counts[ind_send];
        offset_recv += counts[ind_recv];
612
613

        /* Else, emit some communications */
614
      } else {
615
        if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type,
616
617
                      k, 4 * ind_send + 0, MPI_COMM_WORLD,
                      &reqs[8 * k + 0]) != MPI_SUCCESS)
618
          error("Failed to isend parts to node %i.", k);
619
        if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type,
620
621
                      k, 4 * ind_send + 1, MPI_COMM_WORLD,
                      &reqs[8 * k + 1]) != MPI_SUCCESS)
622
          error("Failed to isend xparts to node %i.", k);
623
624
        offset_send += counts[ind_send];
      }
625
    }
626
627
628
629

    /* Are we sending any gpart ? */
    if (g_counts[ind_send] > 0) {

630
      /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */
631

632
633
      /* If the send is to the same node, just copy */
      if (k == nodeID) {
634
        memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send],
635
636
637
638
639
640
641
               sizeof(struct gpart) * g_counts[ind_recv]);
        g_offset_send += g_counts[ind_send];
        g_offset_recv += g_counts[ind_recv];

        /* Else, emit some communications */
      } else {
        if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send],
642
643
                      gpart_mpi_type, k, 4 * ind_send + 2, MPI_COMM_WORLD,
                      &reqs[8 * k + 2]) != MPI_SUCCESS)
644
          error("Failed to isend gparts to node %i.", k);
645
        g_offset_send += g_counts[ind_send];
646
647
648
      }
    }

649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
    /* Are we sending any spart ? */
    if (s_counts[ind_send] > 0) {

      /* message("Sending %d spart to node %d", s_counts[ind_send], k); */

      /* If the send is to the same node, just copy */
      if (k == nodeID) {
        memcpy(&sparts_new[s_offset_recv], &s->sparts[s_offset_send],
               sizeof(struct spart) * s_counts[ind_recv]);
        s_offset_send += s_counts[ind_send];
        s_offset_recv += s_counts[ind_recv];

        /* Else, emit some communications */
      } else {
        if (MPI_Isend(&s->sparts[s_offset_send], s_counts[ind_send],
                      spart_mpi_type, k, 4 * ind_send + 3, MPI_COMM_WORLD,
                      &reqs[8 * k + 3]) != MPI_SUCCESS)
          error("Failed to isend gparts to node %i.", k);
        s_offset_send += s_counts[ind_send];
      }
    }

671
672
673
    /* Now emit the corresponding Irecv() */

    /* Are we receiving any part/xpart from this node ? */
674
    if (k != nodeID && counts[ind_recv] > 0) {
675
      if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k,
676
677
                    4 * ind_recv + 0, MPI_COMM_WORLD,
                    &reqs[8 * k + 4]) != MPI_SUCCESS)
678
        error("Failed to emit irecv of parts from node %i.", k);
679
      if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type,
680
681
                    k, 4 * ind_recv + 1, MPI_COMM_WORLD,
                    &reqs[8 * k + 5]) != MPI_SUCCESS)
682
        error("Failed to emit irecv of xparts from node %i.", k);
683
      offset_recv += counts[ind_recv];
684
    }
685
686
687
688

    /* Are we receiving any gpart from this node ? */
    if (k != nodeID && g_counts[ind_recv] > 0) {
      if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv],
689
690
                    gpart_mpi_type, k, 4 * ind_recv + 2, MPI_COMM_WORLD,
                    &reqs[8 * k + 6]) != MPI_SUCCESS)
691
692
693
        error("Failed to emit irecv of gparts from node %i.", k);
      g_offset_recv += g_counts[ind_recv];
    }
694
695
696
697
698
699
700
701
702

    /* Are we receiving any spart from this node ? */
    if (k != nodeID && s_counts[ind_recv] > 0) {
      if (MPI_Irecv(&sparts_new[s_offset_recv], s_counts[ind_recv],
                    spart_mpi_type, k, 4 * ind_recv + 3, MPI_COMM_WORLD,
                    &reqs[8 * k + 7]) != MPI_SUCCESS)
        error("Failed to emit irecv of sparts from node %i.", k);
      s_offset_recv += s_counts[ind_recv];
    }
703
  }
704

705
  /* Wait for all the sends and recvs to tumble in. */
706
  MPI_Status stats[8 * nr_nodes];
707
  int res;
708
709
  if ((res = MPI_Waitall(8 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
    for (int k = 0; k < 8 * nr_nodes; k++) {
710
711
712
      char buff[MPI_MAX_ERROR_STRING];
      MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
      message("request %i has error '%s'.", k, buff);
713
    }
714
715
    error("Failed during waitall for part data.");
  }
716

717
718
719
720
721
  /* All particles have now arrived. Time for some final operations on the
     stuff we just received */

  /* Restore the part<->gpart and spart<->gpart links */
  size_t offset_parts = 0, offset_sparts = 0, offset_gparts = 0;
722
723
724
725
726
  for (int node = 0; node < nr_nodes; ++node) {

    const int ind_recv = node * nr_nodes + nodeID;
    const size_t count_parts = counts[ind_recv];
    const size_t count_gparts = g_counts[ind_recv];
727
    const size_t count_sparts = s_counts[ind_recv];
728
729
730
731

    /* Loop over the gparts received from that node */
    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) {

732
733
      /* Does this gpart have a gas partner ? */
      if (gparts_new[k].type == swift_type_gas) {
734

Matthieu Schaller's avatar
Style    
Matthieu Schaller committed
735
736
        const ptrdiff_t partner_index =
            offset_parts - gparts_new[k].id_or_neg_offset;
737
738

        /* Re-link */
739
        gparts_new[k].id_or_neg_offset = -partner_index;
Pedro Gonnet's avatar
Pedro Gonnet committed
740
        parts_new[partner_index].gpart = &gparts_new[k];
741
      }
742
743
744
745
746
747
748
749
750
751
752

      /* Does this gpart have a star partner ? */
      if (gparts_new[k].type == swift_type_star) {

        const ptrdiff_t partner_index =
            offset_sparts - gparts_new[k].id_or_neg_offset;

        /* Re-link */
        gparts_new[k].id_or_neg_offset = -partner_index;
        sparts_new[partner_index].gpart = &gparts_new[k];
      }
753
754
755
756
    }

    offset_parts += count_parts;
    offset_gparts += count_gparts;
757
    offset_sparts += count_sparts;
758
759
  }

760
#ifdef SWIFT_DEBUG_CHECKS
761
  /* Verify that all parts are in the right place. */
762
763
764
765
  for (size_t k = 0; k < nr_parts; k++) {
    const int cid = cell_getid(cdim, parts_new[k].x[0] * iwidth[0],
                               parts_new[k].x[1] * iwidth[1],
                               parts_new[k].x[2] * iwidth[2]);
766
    if (cells[cid].nodeID != nodeID)
767
      error("Received particle (%zu) that does not belong here (nodeID=%i).", k,
768
769
            cells[cid].nodeID);
  }
770
771
772
773
774
775
776
  for (size_t k = 0; k < nr_gparts; k++) {
    const int cid = cell_getid(cdim, gparts_new[k].x[0] * iwidth[0],
                               gparts_new[k].x[1] * iwidth[1],
                               gparts_new[k].x[2] * iwidth[2]);
    if (cells[cid].nodeID != nodeID)
      error("Received g-particle (%zu) that does not belong here (nodeID=%i).",
            k, cells[cid].nodeID);
777
  }
778
779
780
781
782
783
784
  for (size_t k = 0; k < nr_sparts; k++) {
    const int cid = cell_getid(cdim, sparts_new[k].x[0] * iwidth[0],
                               sparts_new[k].x[1] * iwidth[1],
                               sparts_new[k].x[2] * iwidth[2]);
    if (cells[cid].nodeID != nodeID)
      error("Received s-particle (%zu) that does not belong here (nodeID=%i).",
            k, cells[cid].nodeID);
785
  }
786

787
788
  /* Verify that the links are correct */
  part_verify_links(parts_new, gparts_new, sparts_new, nr_parts, nr_gparts,
789
                    nr_sparts, e->verbose);
790
#endif
791
792
793
794

  /* Set the new part data, free the old. */
  free(parts);
  free(xparts);
795
  free(gparts);
796
  free(sparts);
797
798
  s->parts = parts_new;
  s->xparts = xparts_new;
799
  s->gparts = gparts_new;
800
  s->sparts = sparts_new;
801
  s->nr_parts = nr_parts;
802
  s->nr_gparts = nr_gparts;
803
  s->nr_sparts = nr_sparts;
804
805
  s->size_parts = engine_redistribute_alloc_margin * nr_parts;
  s->size_gparts = engine_redistribute_alloc_margin * nr_gparts;
806
  s->size_sparts = engine_redistribute_alloc_margin * nr_sparts;
807

808
  /* Clean up the temporary stuff. */
809
810
811
812
  free(reqs);
  free(counts);
  free(dest);

813
814
815
816
817
  /* Be verbose about what just happened. */
  if (e->verbose) {
    int my_cells = 0;
    for (int k = 0; k < nr_cells; k++)
      if (cells[k].nodeID == nodeID) my_cells += 1;
818
819
    message("node %i now has %zu parts, %zu sparts and %zu gparts in %i cells.",
            nodeID, nr_parts, nr_sparts, nr_gparts, my_cells);
820
821
  }

822
823
824
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
825
#else
826
  error("SWIFT was not compiled with MPI support.");
827
828
#endif
}
829

830
/**
831
 * @brief Repartition the cells amongst the nodes.
832
833
834
 *
 * @param e The #engine.
 */
835
void engine_repartition(struct engine *e) {
836
837
838

#if defined(WITH_MPI) && defined(HAVE_METIS)

839
840
  ticks tic = getticks();

841
#ifdef SWIFT_DEBUG_CHECKS
842
843
844
845
  /* Be verbose about this. */
  if (e->nodeID == 0 || e->verbose) message("repartitioning space");
  fflush(stdout);

846
  /* Check that all cells have been drifted to the current time */
847
  space_check_drift_point(e->s, e->ti_old);
848
849
#endif

850
  /* Clear the repartition flag. */
851
  e->forcerepart = 0;
852

853
854
  /* Nothing to do if only using a single node. Also avoids METIS
   * bug that doesn't handle this case well. */
855
  if (e->nr_nodes == 1) return;
856

857
  /* Do the repartitioning. */
858
  partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s,
859
                        e->sched.tasks, e->sched.nr_tasks);
860
861
862
863

  /* Now comes the tricky part: Exchange particles between all nodes.
     This is done in two steps, first allreducing a matrix of
     how many particles go from where to where, then re-allocating
864
     the parts array, and emitting the sends and receives.
865
866
867
868
869
870
871
872
873
874
875
     Finally, the space, tasks, and proxies need to be rebuilt. */

  /* Redistribute the particles between the nodes. */
  engine_redistribute(e);

  /* Make the proxies. */
  engine_makeproxies(e);

  /* Tell the engine it should re-build whenever possible */
  e->forcerebuild = 1;

876
877
878
  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
879
#else
880
  error("SWIFT was not compiled with MPI and METIS support.");
881
#endif
882
}
883

884
885
886
887
888
889
890
891
/**
 * @brief Add up/down gravity tasks to a cell hierarchy.
 *
 * @param e The #engine.
 * @param c The #cell
 * @param up The upward gravity #task.
 * @param down The downward gravity #task.
 */
892
893
void engine_addtasks_grav(struct engine *e, struct cell *c, struct task *up,