engine.c 252 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
36
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
37

38
39
/* MPI headers. */
#ifdef WITH_MPI
40
#include <mpi.h>
41
42
#endif

Angus Lepper's avatar
Angus Lepper committed
43
44
#ifdef HAVE_LIBNUMA
#include <numa.h>
45
46
#endif

47
48
49
50
51
/* Load the profiler header, if needed. */
#ifdef WITH_PROFILER
#include <gperftools/profiler.h>
#endif

52
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
53
#include "engine.h"
54
55

/* Local headers. */
56
#include "active.h"
57
#include "atomic.h"
58
#include "cell.h"
59
#include "chemistry.h"
60
#include "clocks.h"
61
#include "cooling.h"
62
#include "cosmology.h"
63
64
#include "cycle.h"
#include "debug.h"
65
#include "equation_of_state.h"
66
#include "error.h"
67
#include "gravity.h"
68
#include "gravity_cache.h"
69
#include "hydro.h"
70
#include "map.h"
71
#include "memswap.h"
72
#include "minmax.h"
73
#include "outputlist.h"
74
#include "parallel_io.h"
75
#include "part.h"
76
#include "partition.h"
James Willis's avatar
James Willis committed
77
#include "profiler.h"
78
#include "proxy.h"
79
#include "restart.h"
80
#include "runner.h"
81
82
#include "serial_io.h"
#include "single_io.h"
83
#include "sort_part.h"
84
#include "sourceterms.h"
Loic Hausammann's avatar
Loic Hausammann committed
85
#include "stars_io.h"
86
#include "statistics.h"
87
#include "timers.h"
88
#include "tools.h"
89
#include "units.h"
90
#include "velociraptor_interface.h"
Matthieu Schaller's avatar
Matthieu Schaller committed
91
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
92

93
94
95
/* Particle cache size. */
#define CACHE_SIZE 512

96
97
98
99
100
const char *engine_policy_names[] = {"none",
                                     "rand",
                                     "steal",
                                     "keep",
                                     "block",
101
                                     "cpu tight",
102
                                     "mpi",
103
                                     "numa affinity",
104
                                     "hydro",
105
106
107
108
109
                                     "self gravity",
                                     "external gravity",
                                     "cosmological integration",
                                     "drift everything",
                                     "reconstruct multi-poles",
110
111
                                     "cooling",
                                     "sourceterms",
James Willis's avatar
James Willis committed
112
113
                                     "stars",
                                     "structure finding"};
Pedro Gonnet's avatar
Pedro Gonnet committed
114

115
116
117
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

118
119
120
121
122
/**
 * @brief Data collected from the cells at the end of a time-step
 */
struct end_of_step_data {

123
  size_t updates, g_updates, s_updates;
124
125
  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
126
127
128
  struct engine *e;
};

129
130
131
132
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
133
 * @param l A pointer to the #link, will be modified atomically.
134
135
136
137
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
138
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
139

140
  /* Get the next free link. */
141
  const size_t ind = atomic_inc(&e->nr_links);
142
143
144
145
  if (ind >= e->size_links) {
    error("Link table overflow.");
  }
  struct link *res = &e->links[ind];
146

147
  /* Set it atomically. */
148
  res->t = t;
149
  res->next = atomic_swap(l, res);
150
}
151

Loic Hausammann's avatar
Loic Hausammann committed
152
153
154
/**
 * @brief Recursively add non-implicit star ghost tasks to a cell hierarchy.
 */
155
156
157
void engine_add_stars_ghosts(struct engine *e, struct cell *c,
                             struct task *stars_ghost_in,
                             struct task *stars_ghost_out) {
Loic Hausammann's avatar
Loic Hausammann committed
158
159
160
161
162
163

  /* If we have reached the leaf OR have to few particles to play with*/
  if (!c->split || c->scount < engine_max_sparts_per_ghost) {

    /* Add the ghost task and its dependencies */
    struct scheduler *s = &e->sched;
164
165
    c->stars_ghost = scheduler_addtask(s, task_type_stars_ghost,
                                       task_subtype_none, 0, 0, c, NULL);
Loic Hausammann's avatar
Loic Hausammann committed
166
167
    scheduler_addunlock(s, stars_ghost_in, c->stars_ghost);
    scheduler_addunlock(s, c->stars_ghost, stars_ghost_out);
Loic Hausammann's avatar
Loic Hausammann committed
168
169
170
171
  } else {
    /* Keep recursing */
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL)
172
173
        engine_add_stars_ghosts(e, c->progeny[k], stars_ghost_in,
                                stars_ghost_out);
Loic Hausammann's avatar
Loic Hausammann committed
174
175
176
  }
}

177
178
179
180
181
/**
 * @brief Recursively add non-implicit ghost tasks to a cell hierarchy.
 */
void engine_add_ghosts(struct engine *e, struct cell *c, struct task *ghost_in,
                       struct task *ghost_out) {
182
183

  /* If we have reached the leaf OR have to few particles to play with*/
184
  if (!c->split || c->count < engine_max_parts_per_ghost) {
185
186

    /* Add the ghost task and its dependencies */
187
188
189
190
191
192
    struct scheduler *s = &e->sched;
    c->ghost =
        scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, 0, c, NULL);
    scheduler_addunlock(s, ghost_in, c->ghost);
    scheduler_addunlock(s, c->ghost, ghost_out);
  } else {
193
    /* Keep recursing */
194
195
196
197
198
199
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL)
        engine_add_ghosts(e, c->progeny[k], ghost_in, ghost_out);
  }
}

200
201
/**
 * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
202
 * i.e. all the O(Npart) tasks -- timestep version
203
204
205
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
206
207
 * Note that there is no need to recurse below the super-cell. Note also
 * that we only add tasks if the relevant particles are present in the cell.
208
 *
209
210
211
 * @param e The #engine.
 * @param c The #cell.
 */
212
void engine_make_hierarchical_tasks_common(struct engine *e, struct cell *c) {
213
214

  struct scheduler *s = &e->sched;
215
  const int is_with_cooling = (e->policy & engine_policy_cooling);
216

217
  /* Are we in a super-cell ? */
218
  if (c->super == c) {
219
220
221
222

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

223
224
      /* Add the two half kicks */
      c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0,
225
                                   c, NULL);
226
227

      c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0,
228
                                   c, NULL);
Tom Theuns's avatar
Tom Theuns committed
229

230
231
      /* Add the time-step calculation task and its dependency */
      c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none,
232
                                      0, 0, c, NULL);
233

234
235
236
237
238
      /* Add the task finishing the force calculation */
      c->end_force = scheduler_addtask(s, task_type_end_force,
                                       task_subtype_none, 0, 0, c, NULL);

      if (!is_with_cooling) scheduler_addunlock(s, c->end_force, c->kick2);
239
      scheduler_addunlock(s, c->kick2, c->timestep);
240
      scheduler_addunlock(s, c->timestep, c->kick1);
241
    }
242

243
  } else { /* We are above the super-cell so need to go deeper */
244

245
246
247
248
249
250
251
    /* Recurse. */
    if (c->split)
      for (int k = 0; k < 8; k++)
        if (c->progeny[k] != NULL)
          engine_make_hierarchical_tasks_common(e, c->progeny[k]);
  }
}
252

253
254
/**
 * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
255
 * i.e. all the O(Npart) tasks -- hydro version
256
257
258
259
260
261
262
263
264
265
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
 * Note that there is no need to recurse below the super-cell. Note also
 * that we only add tasks if the relevant particles are present in the cell.
 *
 * @param e The #engine.
 * @param c The #cell.
 */
void engine_make_hierarchical_tasks_hydro(struct engine *e, struct cell *c) {
266

267
268
269
  struct scheduler *s = &e->sched;
  const int is_with_cooling = (e->policy & engine_policy_cooling);
  const int is_with_sourceterms = (e->policy & engine_policy_sourceterms);
270

271
272
  /* Are we in a super-cell ? */
  if (c->super_hydro == c) {
273

274
    /* Add the sort task. */
275
276
    c->sorts =
        scheduler_addtask(s, task_type_sort, task_subtype_none, 0, 0, c, NULL);
277

278
279
    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {
280

281
      /* Add the drift task. */
282
283
      c->drift_part = scheduler_addtask(s, task_type_drift_part,
                                        task_subtype_none, 0, 0, c, NULL);
284

285
286
      /* Generate the ghost tasks. */
      c->ghost_in =
287
288
          scheduler_addtask(s, task_type_ghost_in, task_subtype_none, 0,
                            /* implicit = */ 1, c, NULL);
289
      c->ghost_out =
290
291
          scheduler_addtask(s, task_type_ghost_out, task_subtype_none, 0,
                            /* implicit = */ 1, c, NULL);
292
      engine_add_ghosts(e, c, c->ghost_in, c->ghost_out);
293
294

#ifdef EXTRA_HYDRO_LOOP
295
296
      /* Generate the extra ghost task. */
      c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost,
297
                                         task_subtype_none, 0, 0, c, NULL);
298
#endif
299

300
      /* Cooling task */
301
      if (is_with_cooling) {
Matthieu Schaller's avatar
Matthieu Schaller committed
302
        c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none,
303
                                       0, 0, c, NULL);
304

305
        scheduler_addunlock(s, c->super->end_force, c->cooling);
306
        scheduler_addunlock(s, c->cooling, c->super->kick2);
307
308
      }

309
      /* add source terms */
310
      if (is_with_sourceterms) {
311
        c->sourceterms = scheduler_addtask(s, task_type_sourceterms,
312
                                           task_subtype_none, 0, 0, c, NULL);
313
      }
314
315
    }

316
  } else { /* We are above the super-cell so need to go deeper */
317

318
319
320
321
322
323
324
325
    /* Recurse. */
    if (c->split)
      for (int k = 0; k < 8; k++)
        if (c->progeny[k] != NULL)
          engine_make_hierarchical_tasks_hydro(e, c->progeny[k]);
  }
}

326
327
328
329
330
331
332
333
334
335
336
337
/**
 * @brief Generate the hydro hierarchical tasks for a hierarchy of cells -
 * i.e. all the O(Npart) tasks -- gravity version
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
 * Note that there is no need to recurse below the super-cell. Note also
 * that we only add tasks if the relevant particles are present in the cell.
 *
 * @param e The #engine.
 * @param c The #cell.
 */
338
339
340
341
342
343
344
345
346
347
348
349
350
void engine_make_hierarchical_tasks_gravity(struct engine *e, struct cell *c) {

  struct scheduler *s = &e->sched;
  const int periodic = e->s->periodic;
  const int is_self_gravity = (e->policy & engine_policy_self_gravity);

  /* Are we in a super-cell ? */
  if (c->super_gravity == c) {

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

      c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart,
351
352
353
                                         task_subtype_none, 0, 0, c, NULL);

      if (is_self_gravity) {
354

355
356
357
358
359
360
361
362
363
364
365
366
        /* Initialisation of the multipoles */
        c->init_grav = scheduler_addtask(s, task_type_init_grav,
                                         task_subtype_none, 0, 0, c, NULL);

        /* Gravity non-neighbouring pm calculations */
        c->grav_long_range = scheduler_addtask(
            s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL);

        /* Gravity recursive down-pass */
        c->grav_down = scheduler_addtask(s, task_type_grav_down,
                                         task_subtype_none, 0, 0, c, NULL);

367
368
369
370
371
372
        /* Implicit tasks for the up and down passes */
        c->init_grav_out = scheduler_addtask(s, task_type_init_grav_out,
                                             task_subtype_none, 0, 1, c, NULL);
        c->grav_down_in = scheduler_addtask(s, task_type_grav_down_in,
                                            task_subtype_none, 0, 1, c, NULL);

373
374
375
376
377
378
        /* Gravity mesh force propagation */
        if (periodic)
          c->grav_mesh = scheduler_addtask(s, task_type_grav_mesh,
                                           task_subtype_none, 0, 0, c, NULL);

        if (periodic) scheduler_addunlock(s, c->drift_gpart, c->grav_mesh);
379
        if (periodic) scheduler_addunlock(s, c->grav_mesh, c->grav_down);
380
381
        scheduler_addunlock(s, c->init_grav, c->grav_long_range);
        scheduler_addunlock(s, c->grav_long_range, c->grav_down);
382
        scheduler_addunlock(s, c->grav_down, c->super->end_force);
383

Matthieu Schaller's avatar
Matthieu Schaller committed
384
        /* Link in the implicit tasks */
385
386
        scheduler_addunlock(s, c->init_grav, c->init_grav_out);
        scheduler_addunlock(s, c->grav_down_in, c->grav_down);
387
388
      }
    }
389
  }
390

391
392
  /* We are below the super-cell but not below the maximal splitting depth */
  else if (c->super_gravity != NULL && c->depth <= space_subdepth_grav) {
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

      if (is_self_gravity) {

        c->init_grav_out = scheduler_addtask(s, task_type_init_grav_out,
                                             task_subtype_none, 0, 1, c, NULL);

        c->grav_down_in = scheduler_addtask(s, task_type_grav_down_in,
                                            task_subtype_none, 0, 1, c, NULL);

        scheduler_addunlock(s, c->parent->init_grav_out, c->init_grav_out);
        scheduler_addunlock(s, c->grav_down_in, c->parent->grav_down_in);
      }
    }
409
  }
410

411
412
  /* Recurse but not below the maximal splitting depth */
  if (c->split && c->depth <= space_subdepth_grav)
413
414
415
    for (int k = 0; k < 8; k++)
      if (c->progeny[k] != NULL)
        engine_make_hierarchical_tasks_gravity(e, c->progeny[k]);
416
}
417

Loic Hausammann's avatar
Loic Hausammann committed
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
/**
 * @brief Generate the stars hierarchical tasks for a hierarchy of cells -
 * i.e. all the O(Npart) tasks -- star version
 *
 * Tasks are only created here. The dependencies will be added later on.
 *
 * Note that there is no need to recurse below the super-cell. Note also
 * that we only add tasks if the relevant particles are present in the cell.
 *
 * @param e The #engine.
 * @param c The #cell.
 */
void engine_make_hierarchical_tasks_stars(struct engine *e, struct cell *c) {

  struct scheduler *s = &e->sched;

  /* Are we in a super-cell ? */
Loic Hausammann's avatar
Loic Hausammann committed
435
  if (c->super == c) {
Loic Hausammann's avatar
Loic Hausammann committed
436
437
438
439
440

    /* Local tasks only... */
    if (c->nodeID == e->nodeID) {

      /* Generate the ghost tasks. */
Loic Hausammann's avatar
Loic Hausammann committed
441
442
      c->stars_ghost_in =
          scheduler_addtask(s, task_type_stars_ghost_in, task_subtype_none, 0,
Loic Hausammann's avatar
Loic Hausammann committed
443
                            /* implicit = */ 1, c, NULL);
Loic Hausammann's avatar
Loic Hausammann committed
444
445
      c->stars_ghost_out =
          scheduler_addtask(s, task_type_stars_ghost_out, task_subtype_none, 0,
Loic Hausammann's avatar
Loic Hausammann committed
446
                            /* implicit = */ 1, c, NULL);
Loic Hausammann's avatar
Loic Hausammann committed
447
      engine_add_stars_ghosts(e, c, c->stars_ghost_in, c->stars_ghost_out);
448
449
    }
  } else { /* We are above the super-cell so need to go deeper */
Loic Hausammann's avatar
Loic Hausammann committed
450
451
452
453
454
455
456
457
458

    /* Recurse. */
    if (c->split)
      for (int k = 0; k < 8; k++)
        if (c->progeny[k] != NULL)
          engine_make_hierarchical_tasks_stars(e, c->progeny[k]);
  }
}

459
460
461
void engine_make_hierarchical_tasks_mapper(void *map_data, int num_elements,
                                           void *extra_data) {
  struct engine *e = (struct engine *)extra_data;
462
463
  const int is_with_hydro = (e->policy & engine_policy_hydro);
  const int is_with_self_gravity = (e->policy & engine_policy_self_gravity);
464
465
  const int is_with_external_gravity =
      (e->policy & engine_policy_external_gravity);
Loic Hausammann's avatar
Loic Hausammann committed
466
  const int is_with_stars = (e->policy & engine_policy_stars);
467
468
469

  for (int ind = 0; ind < num_elements; ind++) {
    struct cell *c = &((struct cell *)map_data)[ind];
470
471
472
    /* Make the common tasks (time integration) */
    engine_make_hierarchical_tasks_common(e, c);
    /* Add the hydro stuff */
473
    if (is_with_hydro) engine_make_hierarchical_tasks_hydro(e, c);
474
    /* And the gravity stuff */
475
    if (is_with_self_gravity || is_with_external_gravity)
476
      engine_make_hierarchical_tasks_gravity(e, c);
477
    if (is_with_stars) engine_make_hierarchical_tasks_stars(e, c);
478
479
480
  }
}

481
#ifdef WITH_MPI
482
/**
Peter W. Draper's avatar
Peter W. Draper committed
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
 * Do the exchange of one type of particles with all the other nodes.
 *
 * @param counts 2D array with the counts of particles to exchange with
 *               each other node.
 * @param parts the particle data to exchange
 * @param new_nr_parts the number of particles this node will have after all
 *                     exchanges have completed.
 * @param sizeofparts sizeof the particle struct.
 * @param alignsize the memory alignment required for this particle type.
 * @param mpi_type the MPI_Datatype for these particles.
 * @param nr_nodes the number of nodes to exchange with.
 * @param nodeID the id of this node.
 *
 * @result new particle data constructed from all the exchanges with the
 *         given alignment.
498
 */
499
static void *engine_do_redistribute(int *counts, char *parts,
500
501
                                    size_t new_nr_parts, size_t sizeofparts,
                                    size_t alignsize, MPI_Datatype mpi_type,
502
                                    int nr_nodes, int nodeID) {
503
504

  /* Allocate a new particle array with some extra margin */
505
  char *parts_new = NULL;
506
507
  if (posix_memalign(
          (void **)&parts_new, alignsize,
508
          sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0)
509
510
511
512
    error("Failed to allocate new particle data.");

  /* Prepare MPI requests for the asynchronous communications */
  MPI_Request *reqs;
513
514
  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) ==
      NULL)
515
516
    error("Failed to allocate MPI request list.");

517
  /* Only send and receive only "chunk" particles per request. So we need to
518
519
520
   * loop as many times as necessary here. Make 2Gb/sizeofparts so we only
   * send 2Gb packets. */
  const int chunk = INT_MAX / sizeofparts;
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
  int sent = 0;
  int recvd = 0;

  int activenodes = 1;
  while (activenodes) {

    for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;

    /* Emit the sends and recvs for the data. */
    size_t offset_send = sent;
    size_t offset_recv = recvd;
    activenodes = 0;

    for (int k = 0; k < nr_nodes; k++) {

      /* Indices in the count arrays of the node of interest */
      const int ind_send = nodeID * nr_nodes + k;
      const int ind_recv = k * nr_nodes + nodeID;

      /* Are we sending any data this loop? */
      int sending = counts[ind_send] - sent;
      if (sending > 0) {
        activenodes++;
544
        if (sending > chunk) sending = chunk;
545
546
547
548

        /* If the send and receive is local then just copy. */
        if (k == nodeID) {
          int receiving = counts[ind_recv] - recvd;
549
          if (receiving > chunk) receiving = chunk;
550
          memcpy(&parts_new[offset_recv * sizeofparts],
551
                 &parts[offset_send * sizeofparts], sizeofparts * receiving);
552
553
        } else {
          /* Otherwise send it. */
554
555
556
          int res =
              MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k,
                        ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]);
557
558
559
560
          if (res != MPI_SUCCESS)
            mpi_error(res, "Failed to isend parts to node %i.", k);
        }
      }
561

562
      /* If we're sending to this node, then move past it to next. */
563
      if (counts[ind_send] > 0) offset_send += counts[ind_send];
564

565
566
567
568
569
570
      /* Are we receiving any data from this node? Note already done if coming
       * from this node. */
      if (k != nodeID) {
        int receiving = counts[ind_recv] - recvd;
        if (receiving > 0) {
          activenodes++;
571
          if (receiving > chunk) receiving = chunk;
572
573
574
575
576
577
          int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving,
                              mpi_type, k, ind_recv, MPI_COMM_WORLD,
                              &reqs[2 * k + 1]);
          if (res != MPI_SUCCESS)
            mpi_error(res, "Failed to emit irecv of parts from node %i.", k);
        }
578
579
      }

580
      /* If we're receiving from this node, then move past it to next. */
581
      if (counts[ind_recv] > 0) offset_recv += counts[ind_recv];
582
583
    }

584
585
586
587
588
589
590
    /* Wait for all the sends and recvs to tumble in. */
    MPI_Status stats[2 * nr_nodes];
    int res;
    if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
      for (int k = 0; k < 2 * nr_nodes; k++) {
        char buff[MPI_MAX_ERROR_STRING];
        MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
591
592
        message("request from source %i, tag %i has error '%s'.",
                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
593
594
      }
      error("Failed during waitall for part data.");
595
    }
596
597
598
599

    /* Move to next chunks. */
    sent += chunk;
    recvd += chunk;
600
601
602
603
604
605
606
607
608
609
  }

  /* Free temps. */
  free(reqs);

  /* And return new memory. */
  return parts_new;
}
#endif

610
#ifdef WITH_MPI /* redist_mapper */
611

612
/* Support for engine_redistribute threadpool dest mappers. */
613
struct redist_mapper_data {
614
  int *counts;
615
616
617
618
619
  int *dest;
  int nodeID;
  int nr_nodes;
  struct cell *cells;
  struct space *s;
620
621
622
  void *base;
};

623
624
625
/* Generic function for accumulating counts for TYPE parts. Note
 * we use a local counts array to avoid the atomic_add in the parts
 * loop. */
Peter W. Draper's avatar
Peter W. Draper committed
626
#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE)                              \
627
628
629
  engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \
                                         void *extra_data) {               \
    struct TYPE *parts = (struct TYPE *)map_data;                          \
630
631
    struct redist_mapper_data *mydata =                                    \
        (struct redist_mapper_data *)extra_data;                           \
632
633
634
635
    struct space *s = mydata->s;                                           \
    int *dest =                                                            \
        mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base);   \
    int *lcounts = NULL;                                                   \
636
637
    if ((lcounts = (int *)calloc(                                          \
             sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL)   \
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
      error("Failed to allocate counts thread-specific buffer");           \
    for (int k = 0; k < num_elements; k++) {                               \
      for (int j = 0; j < 3; j++) {                                        \
        if (parts[k].x[j] < 0.0)                                           \
          parts[k].x[j] += s->dim[j];                                      \
        else if (parts[k].x[j] >= s->dim[j])                               \
          parts[k].x[j] -= s->dim[j];                                      \
      }                                                                    \
      const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0],    \
                                 parts[k].x[1] * s->iwidth[1],             \
                                 parts[k].x[2] * s->iwidth[2]);            \
      dest[k] = s->cells_top[cid].nodeID;                                  \
      size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k];            \
      lcounts[ind] += 1;                                                   \
    }                                                                      \
    for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++)        \
      atomic_add(&mydata->counts[k], lcounts[k]);                          \
    free(lcounts);                                                         \
  }
657

658
659
/**
 * @brief Accumulate the counts of particles per cell.
660
 * Threadpool helper for accumulating the counts of particles per cell.
661
 *
662
 * part version.
663
 */
Peter W. Draper's avatar
Peter W. Draper committed
664
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part);
665

666
667
/**
 * @brief Accumulate the counts of star particles per cell.
668
 * Threadpool helper for accumulating the counts of particles per cell.
669
 *
670
 * spart version.
671
 */
Peter W. Draper's avatar
Peter W. Draper committed
672
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart);
673

674
675
/**
 * @brief Accumulate the counts of gravity particles per cell.
676
 * Threadpool helper for accumulating the counts of particles per cell.
677
 *
678
 * gpart version.
679
 */
Peter W. Draper's avatar
Peter W. Draper committed
680
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart);
681

682
#endif /* redist_mapper_data */
683

684
#ifdef WITH_MPI /* savelink_mapper_data */
685
686

/* Support for saving the linkage between gparts and parts/sparts. */
687
struct savelink_mapper_data {
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
  int nr_nodes;
  int *counts;
  void *parts;
  int nodeID;
};

/**
 * @brief Save the offset of each gravity partner of a part or spart.
 *
 * The offset is from the start of the sorted particles to be sent to a node.
 * This is possible as parts without gravity partners have a positive id.
 * These offsets are used to restore the pointers on the receiving node.
 *
 * CHECKS should be eliminated as dead code when optimizing.
 */
Peter W. Draper's avatar
Peter W. Draper committed
703
#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS)                      \
Peter W. Draper's avatar
Peter W. Draper committed
704
705
706
  engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \
                                             void *extra_data) {               \
    int *nodes = (int *)map_data;                                              \
707
708
    struct savelink_mapper_data *mydata =                                      \
        (struct savelink_mapper_data *)extra_data;                             \
Peter W. Draper's avatar
Peter W. Draper committed
709
710
711
712
713
714
715
    int nodeID = mydata->nodeID;                                               \
    int nr_nodes = mydata->nr_nodes;                                           \
    int *counts = mydata->counts;                                              \
    struct TYPE *parts = (struct TYPE *)mydata->parts;                         \
                                                                               \
    for (int j = 0; j < num_elements; j++) {                                   \
      int node = nodes[j];                                                     \
716
      int count = 0;                                                           \
Peter W. Draper's avatar
Peter W. Draper committed
717
718
719
      size_t offset = 0;                                                       \
      for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i];  \
                                                                               \
720
      for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) {             \
Peter W. Draper's avatar
Peter W. Draper committed
721
722
723
724
725
726
727
728
729
        if (parts[k + offset].gpart != NULL) {                                 \
          if (CHECKS)                                                          \
            if (parts[k].gpart->id_or_neg_offset > 0)                          \
              error("Trying to link a partnerless " #TYPE "!");                \
          parts[k + offset].gpart->id_or_neg_offset = -count;                  \
          count++;                                                             \
        }                                                                      \
      }                                                                        \
    }                                                                          \
730
731
732
733
734
735
736
  }

/**
 * @brief Save position of part-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
737
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1);
738
#else
Peter W. Draper's avatar
Peter W. Draper committed
739
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0);
740
741
742
743
744
745
746
#endif

/**
 * @brief Save position of spart-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
747
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1);
748
#else
Peter W. Draper's avatar
Peter W. Draper committed
749
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0);
750
751
#endif

752
#endif /* savelink_mapper_data */
753

754
#ifdef WITH_MPI /* relink_mapper_data */
755
756

/* Support for relinking parts, gparts and sparts after moving between nodes. */
757
struct relink_mapper_data {
758
759
760
761
  int nodeID;
  int nr_nodes;
  int *counts;
  int *s_counts;
762
763
764
765
766
767
768
769
770
  int *g_counts;
  struct space *s;
};

/**
 * @brief Restore the part/gpart and spart/gpart links for a list of nodes.
 *
 * @param map_data address of nodes to process.
 * @param num_elements the number nodes to process.
771
772
 * @param extra_data additional data defining the context (a
 * relink_mapper_data).
773
774
775
776
777
 */
static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
                                              void *extra_data) {

  int *nodes = (int *)map_data;
778
  struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data;
779
780
781
782

  int nodeID = mydata->nodeID;
  int nr_nodes = mydata->nr_nodes;
  int *counts = mydata->counts;
783
  int *g_counts = mydata->g_counts;
784
785
  int *s_counts = mydata->s_counts;
  struct space *s = mydata->s;
786
787
788
789

  for (int i = 0; i < num_elements; i++) {

    int node = nodes[i];
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804

    /* Get offsets to correct parts of the counts arrays for this node. */
    size_t offset_parts = 0;
    size_t offset_gparts = 0;
    size_t offset_sparts = 0;
    for (int n = 0; n < node; n++) {
      int ind_recv = n * nr_nodes + nodeID;
      offset_parts += counts[ind_recv];
      offset_gparts += g_counts[ind_recv];
      offset_sparts += s_counts[ind_recv];
    }

    /* Number of gparts sent from this node. */
    int ind_recv = node * nr_nodes + nodeID;
    const size_t count_gparts = g_counts[ind_recv];
805
806

    /* Loop over the gparts received from this node */
807
    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) {
808
809
810
811
812

      /* Does this gpart have a gas partner ? */
      if (s->gparts[k].type == swift_type_gas) {

        const ptrdiff_t partner_index =
813
            offset_parts - s->gparts[k].id_or_neg_offset;
814
815
816
817
818
819
820

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
        s->parts[partner_index].gpart = &s->gparts[k];
      }

      /* Does this gpart have a star partner ? */
821
      else if (s->gparts[k].type == swift_type_stars) {
822
823

        const ptrdiff_t partner_index =
824
            offset_sparts - s->gparts[k].id_or_neg_offset;
825
826
827
828
829
830
831
832
833

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
        s->sparts[partner_index].gpart = &s->gparts[k];
      }
    }
  }
}

834
#endif /* relink_mapper_data */
835

836
/**
837
 * @brief Redistribute the particles amongst the nodes according
838
839
 *      to their cell's node IDs.
 *
840
841
842
843
 * The strategy here is as follows:
 * 1) Each node counts the number of particles it has to send to each other
 * node.
 * 2) The number of particles of each type is then exchanged.
844
845
846
847
 * 3) The particles to send are placed in a temporary buffer in which the
 * part-gpart links are preserved.
 * 4) Each node allocates enough space for the new particles.
 * 5) (Asynchronous) communications are issued to transfer the data.
848
849
 *
 *
850
851
 * @param e The #engine.
 */
852
void engine_redistribute(struct engine *e) {
853

854
#ifdef WITH_MPI
855

856
857
  const int nr_nodes = e->nr_nodes;
  const int nodeID = e->nodeID;
858
  struct space *s = e->s;
859
  struct cell *cells = s->cells_top;
860
  const int nr_cells = s->nr_cells;
861
862
  struct part *parts = s->parts;
  struct gpart *gparts = s->gparts;
863
  struct spart *sparts = s->sparts;
864
  ticks tic = getticks();
865

866
  /* Allocate temporary arrays to store the counts of particles to be sent
867
868
   * and the destination of each particle */
  int *counts;
869
  if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
870
    error("Failed to allocate counts temporary buffer.");
871

872
  int *dest;
873
  if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
874
875
    error("Failed to allocate dest temporary buffer.");

876
877
878
879
880
  /* Simple index of node IDs, used for mappers over nodes. */
  int *nodes = NULL;
  if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL)
    error("Failed to allocate nodes temporary buffer.");
  for (int k = 0; k < nr_nodes; k++) nodes[k] = k;
881

882
  /* Get destination of each particle */
883
  struct redist_mapper_data redist_data;
884
885
886
  redist_data.s = s;
  redist_data.nodeID = nodeID;
  redist_data.nr_nodes = nr_nodes;
887

888
889
890
  redist_data.counts = counts;
  redist_data.dest = dest;
  redist_data.base = (void *)parts;
891

892
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts,
893
                 s->nr_parts, sizeof(struct part), 0, &redist_data);
894
895

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
896
  if (s->nr_parts > 0)
897
898
    space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes],
                     nr_nodes, 0);
899

900
901
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
902
903
904
905
  for (size_t k = 0; k < s->nr_parts; k++) {
    const struct part *p = &s->parts[k];

    /* New cell index */
906
    const int new_cid =
907
908
909
910
        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
                   p->x[2] * s->iwidth[2]);

    /* New cell of this part */
911
912
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
913

914
915
    if (dest[k] != new_node)
      error("part's new node index not matching sorted index.");
916
917
918
919
920

    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
      error("part not sorted into the right top-level cell!");
921
922
923
  }
#endif

924
925
  /* We will need to re-link the gpart partners of parts, so save their
   * relative positions in the sent lists. */
926
  if (s->nr_parts > 0 && s->nr_gparts > 0) {
927

928
    struct savelink_mapper_data savelink_data;
929
930
931
932
933
934
    savelink_data.nr_nodes = nr_nodes;
    savelink_data.counts = counts;
    savelink_data.parts = (void *)parts;
    savelink_data.nodeID = nodeID;
    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part,
                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
935
  }
936
  free(dest);
937

938
  /* Get destination of each s-particle */
939
  int *s_counts;
940
  if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
941
942
943
944
945
946
    error("Failed to allocate s_counts temporary buffer.");

  int *s_dest;
  if ((s_dest = (int *)malloc(sizeof(int) * s->nr_sparts)) == NULL)
    error("Failed to allocate s_dest temporary buffer.");

947
948
949
  redist_data.counts = s_counts;
  redist_data.dest = s_dest;
  redist_data.base = (void *)sparts;
950

951
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts,
952
                 s->nr_sparts, sizeof(struct spart), 0, &redist_data);
953
954

  /* Sort the particles according to their cell index. */
Matthieu Schaller's avatar
Matthieu Schaller committed
955
  if (s->nr_sparts > 0)
956
957
    space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes,
                      0);
958

959
960
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the spart have been sorted correctly. */
961
962
963
964
  for (size_t k = 0; k < s->nr_sparts; k++) {
    const struct spart *sp = &s->sparts[k];

    /* New cell index */
965
    const int new_cid =
966
967
968
969
        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
                   sp->x[2] * s->iwidth[2]);

    /* New cell of this spart */
970
971
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
972

973
974
    if (s_dest[k] != new_node)
      error("spart's new node index not matching sorted index.");
975
976
977
978
979

    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
      error("spart not sorted into the right top-level cell!");
980
981
982
  }
#endif