engine.c 220 KB
Newer Older
Pedro Gonnet's avatar
Pedro Gonnet committed
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
Peter W. Draper's avatar
Peter W. Draper committed
5
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
6
7
8
 *                    Angus Lepper (angus.lepper@ed.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
9
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
10
11
12
13
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
14
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
15
16
17
18
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
19
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
20
21
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
22
 *
Pedro Gonnet's avatar
Pedro Gonnet committed
23
24
25
26
27
28
29
30
31
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
32
#include <stdbool.h>
33
34
35
36
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Pedro Gonnet's avatar
Pedro Gonnet committed
37

38
39
/* MPI headers. */
#ifdef WITH_MPI
40
#include <mpi.h>
41
42
#endif

Angus Lepper's avatar
Angus Lepper committed
43
44
#ifdef HAVE_LIBNUMA
#include <numa.h>
45
46
#endif

47
48
49
50
51
/* Load the profiler header, if needed. */
#ifdef WITH_PROFILER
#include <gperftools/profiler.h>
#endif

52
/* This object's header. */
Pedro Gonnet's avatar
Pedro Gonnet committed
53
#include "engine.h"
54
55

/* Local headers. */
56
#include "active.h"
57
#include "atomic.h"
58
#include "cell.h"
59
#include "chemistry.h"
60
#include "clocks.h"
61
#include "cooling.h"
62
#include "cosmology.h"
63
64
#include "cycle.h"
#include "debug.h"
65
#include "entropy_floor.h"
66
#include "equation_of_state.h"
67
#include "error.h"
Alexei Borissov's avatar
Alexei Borissov committed
68
#include "feedback.h"
69
#include "gravity.h"
70
#include "gravity_cache.h"
71
#include "hydro.h"
lhausamm's avatar
lhausamm committed
72
#include "logger.h"
lhausamm's avatar
lhausamm committed
73
#include "logger_io.h"
74
#include "map.h"
75
#include "memswap.h"
76
#include "memuse.h"
77
#include "minmax.h"
78
#include "outputlist.h"
79
#include "parallel_io.h"
80
#include "part.h"
81
#include "partition.h"
James Willis's avatar
James Willis committed
82
#include "profiler.h"
83
#include "proxy.h"
84
#include "restart.h"
85
#include "runner.h"
86
87
#include "serial_io.h"
#include "single_io.h"
88
#include "sort_part.h"
89
#include "star_formation.h"
90
#include "star_formation_logger.h"
91
#include "star_formation_logger_struct.h"
Loic Hausammann's avatar
Loic Hausammann committed
92
#include "stars_io.h"
93
#include "statistics.h"
94
#include "timers.h"
95
#include "tools.h"
96
#include "units.h"
97
#include "velociraptor_interface.h"
98
#include "version.h"
Pedro Gonnet's avatar
Pedro Gonnet committed
99

100
101
102
/* Particle cache size. */
#define CACHE_SIZE 512

103
104
105
106
107
const char *engine_policy_names[] = {"none",
                                     "rand",
                                     "steal",
                                     "keep",
                                     "block",
108
                                     "cpu tight",
109
                                     "mpi",
110
                                     "numa affinity",
111
                                     "hydro",
112
113
114
115
116
                                     "self gravity",
                                     "external gravity",
                                     "cosmological integration",
                                     "drift everything",
                                     "reconstruct multi-poles",
117
                                     "temperature",
118
                                     "cooling",
James Willis's avatar
James Willis committed
119
                                     "stars",
120
                                     "structure finding",
121
                                     "star formation",
122
                                     "feedback",
123
                                     "black holes",
124
                                     "fof search",
125
                                     "time-step limiter"};
Pedro Gonnet's avatar
Pedro Gonnet committed
126

127
128
129
/** The rank of the engine as a global variable (for messages). */
int engine_rank;

130
/** The current step of the engine as a global variable (for messages). */
131
int engine_current_step;
132

133
134
135
136
137
/**
 * @brief Data collected from the cells at the end of a time-step
 */
struct end_of_step_data {

138
139
  size_t updated, g_updated, s_updated, b_updated;
  size_t inhibited, g_inhibited, s_inhibited, b_inhibited;
140
141
  integertime_t ti_hydro_end_min, ti_hydro_end_max, ti_hydro_beg_max;
  integertime_t ti_gravity_end_min, ti_gravity_end_max, ti_gravity_beg_max;
142
  integertime_t ti_stars_end_min, ti_stars_end_max, ti_stars_beg_max;
143
144
  integertime_t ti_black_holes_end_min, ti_black_holes_end_max,
      ti_black_holes_beg_max;
145
  struct engine *e;
146
  struct star_formation_history sfh;
147
148
};

149
150
151
152
/**
 * @brief Link a density/force task to a cell.
 *
 * @param e The #engine.
153
 * @param l A pointer to the #link, will be modified atomically.
154
155
156
157
 * @param t The #task.
 *
 * @return The new #link pointer.
 */
158
void engine_addlink(struct engine *e, struct link **l, struct task *t) {
159

160
161
162
163
164
165
#ifdef SWIFT_DEBUG_CHECKS
  if (t == NULL) {
    error("Trying to link NULL task.");
  }
#endif

166
  /* Get the next free link. */
167
  const size_t ind = atomic_inc(&e->nr_links);
168
  if (ind >= e->size_links) {
169
170
171
    error(
        "Link table overflow. Increase the value of "
        "`Scheduler:links_per_tasks`.");
172
173
  }
  struct link *res = &e->links[ind];
174

175
  /* Set it atomically. */
176
  res->t = t;
177
  res->next = atomic_swap(l, res);
178
}
179

180
#ifdef WITH_MPI
181
/**
Peter W. Draper's avatar
Peter W. Draper committed
182
183
 * Do the exchange of one type of particles with all the other nodes.
 *
184
 * @param label a label for the memory allocations of this particle type.
Peter W. Draper's avatar
Peter W. Draper committed
185
186
187
188
189
190
191
192
193
194
195
196
197
 * @param counts 2D array with the counts of particles to exchange with
 *               each other node.
 * @param parts the particle data to exchange
 * @param new_nr_parts the number of particles this node will have after all
 *                     exchanges have completed.
 * @param sizeofparts sizeof the particle struct.
 * @param alignsize the memory alignment required for this particle type.
 * @param mpi_type the MPI_Datatype for these particles.
 * @param nr_nodes the number of nodes to exchange with.
 * @param nodeID the id of this node.
 *
 * @result new particle data constructed from all the exchanges with the
 *         given alignment.
198
 */
199
static void *engine_do_redistribute(const char *label, int *counts, char *parts,
200
201
                                    size_t new_nr_parts, size_t sizeofparts,
                                    size_t alignsize, MPI_Datatype mpi_type,
202
                                    int nr_nodes, int nodeID) {
203
204

  /* Allocate a new particle array with some extra margin */
205
  char *parts_new = NULL;
206
207
  if (swift_memalign(
          label, (void **)&parts_new, alignsize,
208
          sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0)
209
210
211
212
    error("Failed to allocate new particle data.");

  /* Prepare MPI requests for the asynchronous communications */
  MPI_Request *reqs;
213
214
  if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) ==
      NULL)
215
216
    error("Failed to allocate MPI request list.");

217
  /* Only send and receive only "chunk" particles per request. So we need to
218
219
220
   * loop as many times as necessary here. Make 2Gb/sizeofparts so we only
   * send 2Gb packets. */
  const int chunk = INT_MAX / sizeofparts;
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
  int sent = 0;
  int recvd = 0;

  int activenodes = 1;
  while (activenodes) {

    for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;

    /* Emit the sends and recvs for the data. */
    size_t offset_send = sent;
    size_t offset_recv = recvd;
    activenodes = 0;

    for (int k = 0; k < nr_nodes; k++) {

      /* Indices in the count arrays of the node of interest */
      const int ind_send = nodeID * nr_nodes + k;
      const int ind_recv = k * nr_nodes + nodeID;

      /* Are we sending any data this loop? */
      int sending = counts[ind_send] - sent;
      if (sending > 0) {
        activenodes++;
244
        if (sending > chunk) sending = chunk;
245
246
247
248

        /* If the send and receive is local then just copy. */
        if (k == nodeID) {
          int receiving = counts[ind_recv] - recvd;
249
          if (receiving > chunk) receiving = chunk;
250
          memcpy(&parts_new[offset_recv * sizeofparts],
251
                 &parts[offset_send * sizeofparts], sizeofparts * receiving);
252
253
        } else {
          /* Otherwise send it. */
254
255
256
          int res =
              MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k,
                        ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]);
257
258
259
260
          if (res != MPI_SUCCESS)
            mpi_error(res, "Failed to isend parts to node %i.", k);
        }
      }
261

262
      /* If we're sending to this node, then move past it to next. */
263
      if (counts[ind_send] > 0) offset_send += counts[ind_send];
264

265
266
267
268
269
270
      /* Are we receiving any data from this node? Note already done if coming
       * from this node. */
      if (k != nodeID) {
        int receiving = counts[ind_recv] - recvd;
        if (receiving > 0) {
          activenodes++;
271
          if (receiving > chunk) receiving = chunk;
272
273
274
275
276
277
          int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving,
                              mpi_type, k, ind_recv, MPI_COMM_WORLD,
                              &reqs[2 * k + 1]);
          if (res != MPI_SUCCESS)
            mpi_error(res, "Failed to emit irecv of parts from node %i.", k);
        }
278
279
      }

280
      /* If we're receiving from this node, then move past it to next. */
281
      if (counts[ind_recv] > 0) offset_recv += counts[ind_recv];
282
283
    }

284
285
286
287
288
289
290
    /* Wait for all the sends and recvs to tumble in. */
    MPI_Status stats[2 * nr_nodes];
    int res;
    if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
      for (int k = 0; k < 2 * nr_nodes; k++) {
        char buff[MPI_MAX_ERROR_STRING];
        MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
291
292
        message("request from source %i, tag %i has error '%s'.",
                stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff);
293
294
      }
      error("Failed during waitall for part data.");
295
    }
296
297
298
299

    /* Move to next chunks. */
    sent += chunk;
    recvd += chunk;
300
301
302
303
304
305
306
307
308
309
  }

  /* Free temps. */
  free(reqs);

  /* And return new memory. */
  return parts_new;
}
#endif

310
#ifdef WITH_MPI /* redist_mapper */
311

312
/* Support for engine_redistribute threadpool dest mappers. */
313
struct redist_mapper_data {
314
  int *counts;
315
316
317
318
319
  int *dest;
  int nodeID;
  int nr_nodes;
  struct cell *cells;
  struct space *s;
320
321
322
  void *base;
};

323
324
325
/* Generic function for accumulating counts for TYPE parts. Note
 * we use a local counts array to avoid the atomic_add in the parts
 * loop. */
Peter W. Draper's avatar
Peter W. Draper committed
326
#define ENGINE_REDISTRIBUTE_DEST_MAPPER(TYPE)                              \
327
328
329
  engine_redistribute_dest_mapper_##TYPE(void *map_data, int num_elements, \
                                         void *extra_data) {               \
    struct TYPE *parts = (struct TYPE *)map_data;                          \
330
331
    struct redist_mapper_data *mydata =                                    \
        (struct redist_mapper_data *)extra_data;                           \
332
333
334
335
    struct space *s = mydata->s;                                           \
    int *dest =                                                            \
        mydata->dest + (ptrdiff_t)(parts - (struct TYPE *)mydata->base);   \
    int *lcounts = NULL;                                                   \
336
337
    if ((lcounts = (int *)calloc(                                          \
             sizeof(int), mydata->nr_nodes * mydata->nr_nodes)) == NULL)   \
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
      error("Failed to allocate counts thread-specific buffer");           \
    for (int k = 0; k < num_elements; k++) {                               \
      for (int j = 0; j < 3; j++) {                                        \
        if (parts[k].x[j] < 0.0)                                           \
          parts[k].x[j] += s->dim[j];                                      \
        else if (parts[k].x[j] >= s->dim[j])                               \
          parts[k].x[j] -= s->dim[j];                                      \
      }                                                                    \
      const int cid = cell_getid(s->cdim, parts[k].x[0] * s->iwidth[0],    \
                                 parts[k].x[1] * s->iwidth[1],             \
                                 parts[k].x[2] * s->iwidth[2]);            \
      dest[k] = s->cells_top[cid].nodeID;                                  \
      size_t ind = mydata->nodeID * mydata->nr_nodes + dest[k];            \
      lcounts[ind] += 1;                                                   \
    }                                                                      \
    for (int k = 0; k < (mydata->nr_nodes * mydata->nr_nodes); k++)        \
      atomic_add(&mydata->counts[k], lcounts[k]);                          \
    free(lcounts);                                                         \
  }
357

358
359
/**
 * @brief Accumulate the counts of particles per cell.
360
 * Threadpool helper for accumulating the counts of particles per cell.
361
 *
362
 * part version.
363
 */
Peter W. Draper's avatar
Peter W. Draper committed
364
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(part);
365

366
367
/**
 * @brief Accumulate the counts of star particles per cell.
368
 * Threadpool helper for accumulating the counts of particles per cell.
369
 *
370
 * spart version.
371
 */
Peter W. Draper's avatar
Peter W. Draper committed
372
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(spart);
373

374
375
/**
 * @brief Accumulate the counts of gravity particles per cell.
376
 * Threadpool helper for accumulating the counts of particles per cell.
377
 *
378
 * gpart version.
379
 */
Peter W. Draper's avatar
Peter W. Draper committed
380
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(gpart);
381

382
383
384
385
386
387
388
389
/**
 * @brief Accumulate the counts of black holes particles per cell.
 * Threadpool helper for accumulating the counts of particles per cell.
 *
 * bpart version.
 */
static void ENGINE_REDISTRIBUTE_DEST_MAPPER(bpart);

390
#endif /* redist_mapper_data */
391

392
#ifdef WITH_MPI /* savelink_mapper_data */
393
394

/* Support for saving the linkage between gparts and parts/sparts. */
395
struct savelink_mapper_data {
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
  int nr_nodes;
  int *counts;
  void *parts;
  int nodeID;
};

/**
 * @brief Save the offset of each gravity partner of a part or spart.
 *
 * The offset is from the start of the sorted particles to be sent to a node.
 * This is possible as parts without gravity partners have a positive id.
 * These offsets are used to restore the pointers on the receiving node.
 *
 * CHECKS should be eliminated as dead code when optimizing.
 */
Peter W. Draper's avatar
Peter W. Draper committed
411
#define ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(TYPE, CHECKS)                      \
Peter W. Draper's avatar
Peter W. Draper committed
412
413
414
  engine_redistribute_savelink_mapper_##TYPE(void *map_data, int num_elements, \
                                             void *extra_data) {               \
    int *nodes = (int *)map_data;                                              \
415
416
    struct savelink_mapper_data *mydata =                                      \
        (struct savelink_mapper_data *)extra_data;                             \
Peter W. Draper's avatar
Peter W. Draper committed
417
418
419
420
421
422
423
    int nodeID = mydata->nodeID;                                               \
    int nr_nodes = mydata->nr_nodes;                                           \
    int *counts = mydata->counts;                                              \
    struct TYPE *parts = (struct TYPE *)mydata->parts;                         \
                                                                               \
    for (int j = 0; j < num_elements; j++) {                                   \
      int node = nodes[j];                                                     \
424
      int count = 0;                                                           \
Peter W. Draper's avatar
Peter W. Draper committed
425
426
427
      size_t offset = 0;                                                       \
      for (int i = 0; i < node; i++) offset += counts[nodeID * nr_nodes + i];  \
                                                                               \
428
      for (int k = 0; k < counts[nodeID * nr_nodes + node]; k++) {             \
Peter W. Draper's avatar
Peter W. Draper committed
429
430
        if (parts[k + offset].gpart != NULL) {                                 \
          if (CHECKS)                                                          \
431
            if (parts[k + offset].gpart->id_or_neg_offset > 0)                 \
Peter W. Draper's avatar
Peter W. Draper committed
432
433
434
435
436
437
              error("Trying to link a partnerless " #TYPE "!");                \
          parts[k + offset].gpart->id_or_neg_offset = -count;                  \
          count++;                                                             \
        }                                                                      \
      }                                                                        \
    }                                                                          \
438
439
440
441
442
443
444
  }

/**
 * @brief Save position of part-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
445
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 1);
446
#else
Peter W. Draper's avatar
Peter W. Draper committed
447
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(part, 0);
448
449
450
451
452
453
454
#endif

/**
 * @brief Save position of spart-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
Peter W. Draper's avatar
Peter W. Draper committed
455
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 1);
456
#else
Peter W. Draper's avatar
Peter W. Draper committed
457
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(spart, 0);
458
459
#endif

460
461
462
463
464
465
466
467
468
469
/**
 * @brief Save position of bpart-gpart links.
 * Threadpool helper for accumulating the counts of particles per cell.
 */
#ifdef SWIFT_DEBUG_CHECKS
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 1);
#else
static void ENGINE_REDISTRIBUTE_SAVELINK_MAPPER(bpart, 0);
#endif

470
#endif /* savelink_mapper_data */
471

472
#ifdef WITH_MPI /* relink_mapper_data */
473

474
475
/* Support for relinking parts, gparts, sparts and bparts after moving between
 * nodes. */
476
struct relink_mapper_data {
477
478
479
480
  int nodeID;
  int nr_nodes;
  int *counts;
  int *s_counts;
481
  int *g_counts;
482
  int *b_counts;
483
484
485
486
487
488
489
490
  struct space *s;
};

/**
 * @brief Restore the part/gpart and spart/gpart links for a list of nodes.
 *
 * @param map_data address of nodes to process.
 * @param num_elements the number nodes to process.
491
492
 * @param extra_data additional data defining the context (a
 * relink_mapper_data).
493
494
495
496
497
 */
static void engine_redistribute_relink_mapper(void *map_data, int num_elements,
                                              void *extra_data) {

  int *nodes = (int *)map_data;
498
  struct relink_mapper_data *mydata = (struct relink_mapper_data *)extra_data;
499
500
501
502

  int nodeID = mydata->nodeID;
  int nr_nodes = mydata->nr_nodes;
  int *counts = mydata->counts;
503
  int *g_counts = mydata->g_counts;
504
  int *s_counts = mydata->s_counts;
505
  int *b_counts = mydata->b_counts;
506
  struct space *s = mydata->s;
507
508
509
510

  for (int i = 0; i < num_elements; i++) {

    int node = nodes[i];
511
512
513
514
515

    /* Get offsets to correct parts of the counts arrays for this node. */
    size_t offset_parts = 0;
    size_t offset_gparts = 0;
    size_t offset_sparts = 0;
516
    size_t offset_bparts = 0;
517
518
519
520
521
    for (int n = 0; n < node; n++) {
      int ind_recv = n * nr_nodes + nodeID;
      offset_parts += counts[ind_recv];
      offset_gparts += g_counts[ind_recv];
      offset_sparts += s_counts[ind_recv];
522
      offset_bparts += b_counts[ind_recv];
523
524
525
526
527
    }

    /* Number of gparts sent from this node. */
    int ind_recv = node * nr_nodes + nodeID;
    const size_t count_gparts = g_counts[ind_recv];
528
529

    /* Loop over the gparts received from this node */
530
    for (size_t k = offset_gparts; k < offset_gparts + count_gparts; k++) {
531
532
533
534
535

      /* Does this gpart have a gas partner ? */
      if (s->gparts[k].type == swift_type_gas) {

        const ptrdiff_t partner_index =
536
            offset_parts - s->gparts[k].id_or_neg_offset;
537
538
539
540
541
542
543

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
        s->parts[partner_index].gpart = &s->gparts[k];
      }

      /* Does this gpart have a star partner ? */
544
      else if (s->gparts[k].type == swift_type_stars) {
545
546

        const ptrdiff_t partner_index =
547
            offset_sparts - s->gparts[k].id_or_neg_offset;
548
549
550
551
552

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
        s->sparts[partner_index].gpart = &s->gparts[k];
      }
553
554
555
556
557
558
559
560
561

      /* Does this gpart have a black hole partner ? */
      else if (s->gparts[k].type == swift_type_black_hole) {

        const ptrdiff_t partner_index =
            offset_bparts - s->gparts[k].id_or_neg_offset;

        /* Re-link */
        s->gparts[k].id_or_neg_offset = -partner_index;
562
        s->bparts[partner_index].gpart = &s->gparts[k];
563
      }
564
565
566
567
    }
  }
}

568
#endif /* relink_mapper_data */
569

570
/**
571
 * @brief Redistribute the particles amongst the nodes according
572
573
 *      to their cell's node IDs.
 *
574
575
576
577
 * The strategy here is as follows:
 * 1) Each node counts the number of particles it has to send to each other
 * node.
 * 2) The number of particles of each type is then exchanged.
578
579
580
581
 * 3) The particles to send are placed in a temporary buffer in which the
 * part-gpart links are preserved.
 * 4) Each node allocates enough space for the new particles.
 * 5) (Asynchronous) communications are issued to transfer the data.
582
583
 *
 *
584
585
 * @param e The #engine.
 */
586
void engine_redistribute(struct engine *e) {
587

588
#ifdef WITH_MPI
589

590
591
  const int nr_nodes = e->nr_nodes;
  const int nodeID = e->nodeID;
592
  struct space *s = e->s;
593
  struct cell *cells = s->cells_top;
594
  const int nr_cells = s->nr_cells;
595
  struct xpart *xparts = s->xparts;
596
597
  struct part *parts = s->parts;
  struct gpart *gparts = s->gparts;
598
  struct spart *sparts = s->sparts;
599
  struct bpart *bparts = s->bparts;
600
  ticks tic = getticks();
601

602
603
604
  size_t nr_parts = s->nr_parts;
  size_t nr_gparts = s->nr_gparts;
  size_t nr_sparts = s->nr_sparts;
605
  size_t nr_bparts = s->nr_bparts;
606
607
608

  /* Start by moving inhibited particles to the end of the arrays */
  for (size_t k = 0; k < nr_parts; /* void */) {
609
610
    if (parts[k].time_bin == time_bin_inhibited ||
        parts[k].time_bin == time_bin_not_created) {
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
      nr_parts -= 1;

      /* Swap the particle */
      memswap(&parts[k], &parts[nr_parts], sizeof(struct part));

      /* Swap the xpart */
      memswap(&xparts[k], &xparts[nr_parts], sizeof(struct xpart));

      /* Swap the link with the gpart */
      if (parts[k].gpart != NULL) {
        parts[k].gpart->id_or_neg_offset = -k;
      }
      if (parts[nr_parts].gpart != NULL) {
        parts[nr_parts].gpart->id_or_neg_offset = -nr_parts;
      }
    } else {
      k++;
    }
  }

  /* Now move inhibited star particles to the end of the arrays */
  for (size_t k = 0; k < nr_sparts; /* void */) {
633
634
    if (sparts[k].time_bin == time_bin_inhibited ||
        sparts[k].time_bin == time_bin_not_created) {
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
      nr_sparts -= 1;

      /* Swap the particle */
      memswap(&s->sparts[k], &s->sparts[nr_sparts], sizeof(struct spart));

      /* Swap the link with the gpart */
      if (s->sparts[k].gpart != NULL) {
        s->sparts[k].gpart->id_or_neg_offset = -k;
      }
      if (s->sparts[nr_sparts].gpart != NULL) {
        s->sparts[nr_sparts].gpart->id_or_neg_offset = -nr_sparts;
      }
    } else {
      k++;
    }
  }

652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
  /* Now move inhibited black hole particles to the end of the arrays */
  for (size_t k = 0; k < nr_bparts; /* void */) {
    if (bparts[k].time_bin == time_bin_inhibited ||
        bparts[k].time_bin == time_bin_not_created) {
      nr_bparts -= 1;

      /* Swap the particle */
      memswap(&s->bparts[k], &s->bparts[nr_bparts], sizeof(struct bpart));

      /* Swap the link with the gpart */
      if (s->bparts[k].gpart != NULL) {
        s->bparts[k].gpart->id_or_neg_offset = -k;
      }
      if (s->bparts[nr_bparts].gpart != NULL) {
        s->bparts[nr_bparts].gpart->id_or_neg_offset = -nr_bparts;
      }
    } else {
      k++;
    }
  }

673
674
  /* Finally do the same with the gravity particles */
  for (size_t k = 0; k < nr_gparts; /* void */) {
675
676
    if (gparts[k].time_bin == time_bin_inhibited ||
        gparts[k].time_bin == time_bin_not_created) {
677
678
679
680
681
682
683
684
685
686
      nr_gparts -= 1;

      /* Swap the particle */
      memswap(&s->gparts[k], &s->gparts[nr_gparts], sizeof(struct gpart));

      /* Swap the link with part/spart */
      if (s->gparts[k].type == swift_type_gas) {
        s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
      } else if (s->gparts[k].type == swift_type_stars) {
        s->sparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
687
688
      } else if (s->gparts[k].type == swift_type_black_hole) {
        s->bparts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k];
689
      }
690

691
692
693
694
695
696
      if (s->gparts[nr_gparts].type == swift_type_gas) {
        s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
      } else if (s->gparts[nr_gparts].type == swift_type_stars) {
        s->sparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
697
698
699
      } else if (s->gparts[nr_gparts].type == swift_type_black_hole) {
        s->bparts[-s->gparts[nr_gparts].id_or_neg_offset].gpart =
            &s->gparts[nr_gparts];
700
701
702
703
704
705
706
707
      }
    } else {
      k++;
    }
  }

  /* Now we are ready to deal with real particles and can start the exchange. */

708
  /* Allocate temporary arrays to store the counts of particles to be sent
709
710
   * and the destination of each particle */
  int *counts;
711
  if ((counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
712
    error("Failed to allocate counts temporary buffer.");
713

714
  int *dest;
715
  if ((dest = (int *)swift_malloc("dest", sizeof(int) * nr_parts)) == NULL)
716
717
    error("Failed to allocate dest temporary buffer.");

718
719
720
721
722
  /* Simple index of node IDs, used for mappers over nodes. */
  int *nodes = NULL;
  if ((nodes = (int *)malloc(sizeof(int) * nr_nodes)) == NULL)
    error("Failed to allocate nodes temporary buffer.");
  for (int k = 0; k < nr_nodes; k++) nodes[k] = k;
723

724
  /* Get destination of each particle */
725
  struct redist_mapper_data redist_data;
726
727
728
  redist_data.s = s;
  redist_data.nodeID = nodeID;
  redist_data.nr_nodes = nr_nodes;
729

730
731
732
  redist_data.counts = counts;
  redist_data.dest = dest;
  redist_data.base = (void *)parts;
733

734
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_part, parts,
735
                 nr_parts, sizeof(struct part), 0, &redist_data);
736
737

  /* Sort the particles according to their cell index. */
738
  if (nr_parts > 0)
739
740
    space_parts_sort(s->parts, s->xparts, dest, &counts[nodeID * nr_nodes],
                     nr_nodes, 0);
741

742
743
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the part have been sorted correctly. */
744
  for (size_t k = 0; k < nr_parts; k++) {
745
746
    const struct part *p = &s->parts[k];

747
748
749
750
751
752
    if (p->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (p->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

753
    /* New cell index */
754
    const int new_cid =
755
756
757
758
        cell_getid(s->cdim, p->x[0] * s->iwidth[0], p->x[1] * s->iwidth[1],
                   p->x[2] * s->iwidth[2]);

    /* New cell of this part */
759
760
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
761

762
763
    if (dest[k] != new_node)
      error("part's new node index not matching sorted index.");
764
765
766
767
768

    if (p->x[0] < c->loc[0] || p->x[0] > c->loc[0] + c->width[0] ||
        p->x[1] < c->loc[1] || p->x[1] > c->loc[1] + c->width[1] ||
        p->x[2] < c->loc[2] || p->x[2] > c->loc[2] + c->width[2])
      error("part not sorted into the right top-level cell!");
769
770
771
  }
#endif

772
773
  /* We will need to re-link the gpart partners of parts, so save their
   * relative positions in the sent lists. */
774
  if (nr_parts > 0 && nr_gparts > 0) {
775

776
    struct savelink_mapper_data savelink_data;
777
778
779
780
781
782
    savelink_data.nr_nodes = nr_nodes;
    savelink_data.counts = counts;
    savelink_data.parts = (void *)parts;
    savelink_data.nodeID = nodeID;
    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_part,
                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
783
  }
784
  swift_free("dest", dest);
785

786
  /* Get destination of each s-particle */
787
  int *s_counts;
788
  if ((s_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
789
790
791
    error("Failed to allocate s_counts temporary buffer.");

  int *s_dest;
792
  if ((s_dest = (int *)swift_malloc("s_dest", sizeof(int) * nr_sparts)) == NULL)
793
794
    error("Failed to allocate s_dest temporary buffer.");

795
796
797
  redist_data.counts = s_counts;
  redist_data.dest = s_dest;
  redist_data.base = (void *)sparts;
798

799
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_spart, sparts,
800
                 nr_sparts, sizeof(struct spart), 0, &redist_data);
801
802

  /* Sort the particles according to their cell index. */
803
  if (nr_sparts > 0)
804
805
    space_sparts_sort(s->sparts, s_dest, &s_counts[nodeID * nr_nodes], nr_nodes,
                      0);
806

807
808
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the spart have been sorted correctly. */
809
  for (size_t k = 0; k < nr_sparts; k++) {
810
811
    const struct spart *sp = &s->sparts[k];

812
813
814
815
816
817
    if (sp->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (sp->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

818
    /* New cell index */
819
    const int new_cid =
820
821
822
823
        cell_getid(s->cdim, sp->x[0] * s->iwidth[0], sp->x[1] * s->iwidth[1],
                   sp->x[2] * s->iwidth[2]);

    /* New cell of this spart */
824
825
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
826

827
828
    if (s_dest[k] != new_node)
      error("spart's new node index not matching sorted index.");
829
830
831
832
833

    if (sp->x[0] < c->loc[0] || sp->x[0] > c->loc[0] + c->width[0] ||
        sp->x[1] < c->loc[1] || sp->x[1] > c->loc[1] + c->width[1] ||
        sp->x[2] < c->loc[2] || sp->x[2] > c->loc[2] + c->width[2])
      error("spart not sorted into the right top-level cell!");
834
835
836
  }
#endif

837
  /* We need to re-link the gpart partners of sparts. */
838
  if (nr_sparts > 0) {
839

840
    struct savelink_mapper_data savelink_data;
841
842
843
844
845
846
    savelink_data.nr_nodes = nr_nodes;
    savelink_data.counts = s_counts;
    savelink_data.parts = (void *)sparts;
    savelink_data.nodeID = nodeID;
    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_spart,
                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
847
  }
848
  swift_free("s_dest", s_dest);
849

850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
  /* Get destination of each b-particle */
  int *b_counts;
  if ((b_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
    error("Failed to allocate b_counts temporary buffer.");

  int *b_dest;
  if ((b_dest = (int *)swift_malloc("b_dest", sizeof(int) * nr_bparts)) == NULL)
    error("Failed to allocate b_dest temporary buffer.");

  redist_data.counts = b_counts;
  redist_data.dest = b_dest;
  redist_data.base = (void *)bparts;

  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_bpart, bparts,
                 nr_bparts, sizeof(struct bpart), 0, &redist_data);

  /* Sort the particles according to their cell index. */
  if (nr_bparts > 0)
    space_bparts_sort(s->bparts, b_dest, &b_counts[nodeID * nr_nodes], nr_nodes,
                      0);

#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the bpart have been sorted correctly. */
  for (size_t k = 0; k < nr_bparts; k++) {
    const struct bpart *bp = &s->bparts[k];

    if (bp->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (bp->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

    /* New cell index */
    const int new_cid =
        cell_getid(s->cdim, bp->x[0] * s->iwidth[0], bp->x[1] * s->iwidth[1],
                   bp->x[2] * s->iwidth[2]);

    /* New cell of this bpart */
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;

891
    if (b_dest[k] != new_node)
892
893
894
895
896
897
898
899
900
901
902
903
904
905
      error("bpart's new node index not matching sorted index.");

    if (bp->x[0] < c->loc[0] || bp->x[0] > c->loc[0] + c->width[0] ||
        bp->x[1] < c->loc[1] || bp->x[1] > c->loc[1] + c->width[1] ||
        bp->x[2] < c->loc[2] || bp->x[2] > c->loc[2] + c->width[2])
      error("bpart not sorted into the right top-level cell!");
  }
#endif

  /* We need to re-link the gpart partners of bparts. */
  if (nr_bparts > 0) {

    struct savelink_mapper_data savelink_data;
    savelink_data.nr_nodes = nr_nodes;
906
    savelink_data.counts = b_counts;
907
908
909
910
911
912
913
    savelink_data.parts = (void *)bparts;
    savelink_data.nodeID = nodeID;
    threadpool_map(&e->threadpool, engine_redistribute_savelink_mapper_bpart,
                   nodes, nr_nodes, sizeof(int), 0, &savelink_data);
  }
  swift_free("b_dest", b_dest);

914
  /* Get destination of each g-particle */
915
  int *g_counts;
916
  if ((g_counts = (int *)calloc(sizeof(int), nr_nodes * nr_nodes)) == NULL)
917
918
919
    error("Failed to allocate g_gcount temporary buffer.");

  int *g_dest;
920
  if ((g_dest = (int *)swift_malloc("g_dest", sizeof(int) * nr_gparts)) == NULL)
921
922
    error("Failed to allocate g_dest temporary buffer.");

923
924
925
  redist_data.counts = g_counts;
  redist_data.dest = g_dest;
  redist_data.base = (void *)gparts;
926

927
  threadpool_map(&e->threadpool, engine_redistribute_dest_mapper_gpart, gparts,
928
                 nr_gparts, sizeof(struct gpart), 0, &redist_data);
929
930

  /* Sort the gparticles according to their cell index. */
931
  if (nr_gparts > 0)
932
    space_gparts_sort(s->gparts, s->parts, s->sparts, s->bparts, g_dest,
933
                      &g_counts[nodeID * nr_nodes], nr_nodes);
934

935
936
#ifdef SWIFT_DEBUG_CHECKS
  /* Verify that the gpart have been sorted correctly. */
937
  for (size_t k = 0; k < nr_gparts; k++) {
938
939
    const struct gpart *gp = &s->gparts[k];

940
941
942
943
944
945
    if (gp->time_bin == time_bin_inhibited)
      error("Inhibited particle found after sorting!");

    if (gp->time_bin == time_bin_not_created)
      error("Inhibited particle found after sorting!");

946
    /* New cell index */
947
    const int new_cid =
948
949
950
951
        cell_getid(s->cdim, gp->x[0] * s->iwidth[0], gp->x[1] * s->iwidth[1],
                   gp->x[2] * s->iwidth[2]);

    /* New cell of this gpart */
952
953
    const struct cell *c = &s->cells_top[new_cid];
    const int new_node = c->nodeID;
954

955
    if (g_dest[k] != new_node)
956
957
      error("gpart's new node index not matching sorted index (%d != %d).",
            g_dest[k], new_node);
958
959
960
961
962

    if (gp->x[0] < c->loc[0] || gp->x[0] > c->loc[0] + c->width[0] ||
        gp->x[1] < c->loc[1] || gp->x[1] > c->loc[1] + c->width[1] ||
        gp->x[2] < c->loc[2] || gp->x[2] > c->loc[2] + c->width[2])
      error("gpart not sorted into the right top-level cell!");
963
964
965
  }
#endif

966
  swift_free("g_dest", g_dest);
967

968
969
970
971
972
  /* Get all the counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM,
                    MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce particle transfer counts.");

973
  /* Get all the g_counts from all the nodes. */
974
975
976
977
  if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce gparticle transfer counts.");

978
  /* Get all the s_counts from all the nodes. */
979
980
981
982
  if (MPI_Allreduce(MPI_IN_PLACE, s_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce sparticle transfer counts.");

983
984
985
986
987
  /* Get all the b_counts from all the nodes. */
  if (MPI_Allreduce(MPI_IN_PLACE, b_counts, nr_nodes * nr_nodes, MPI_INT,
                    MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS)
    error("Failed to allreduce bparticle transfer counts.");

Peter W. Draper's avatar
Peter W. Draper committed
988
  /* Report how many particles will be moved. */
989
990
  if (e->verbose) {
    if (e->nodeID == 0) {
991
992
      size_t total = 0, g_total = 0, s_total = 0, b_total = 0;
      size_t unmoved = 0, g_unmoved = 0, s_unmoved = 0, b_unmoved = 0;
993
994
995
996
997
      for (int p = 0, r = 0; p < nr_nodes; p++) {
        for (int n = 0; n < nr_nodes; n++) {
          total += counts[r];
          g_total += g_counts[r];
          s_total += s_counts[r];
998
          b_total += b_counts[r];
999
1000
          if (p == n) {
            unmoved += counts[r];
For faster browsing, not all history is shown. View entire blame