partition.c 41 KB
Newer Older
1
2
/*******************************************************************************
 * This file is part of SWIFT.
Peter W. Draper's avatar
Peter W. Draper committed
3
4
 * Copyright (c) 2016 Peter W. Draper (p.w.draper@durham.ac.uk)
 *                    Pedro Gonnet (pedro.gonnet@durham.ac.uk)
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

/**
 *  @file partition.c
23
 *  @brief file of various techniques for partitioning and repartitioning
Peter W. Draper's avatar
Peter W. Draper committed
24
25
 *  a grid of cells into geometrically connected regions and distributing
 *  these around a number of MPI nodes.
26
 *
Peter W. Draper's avatar
Peter W. Draper committed
27
 *  Currently supported partitioning types: grid, vectorise and METIS.
28
29
30
31
32
33
 */

/* Config parameters. */
#include "../config.h"

/* Standard headers. */
34
#include <float.h>
35
#include <math.h>
36
37
#include <stdio.h>
#include <stdlib.h>
38
#include <strings.h>
39

40
41
42
43
44
45
46
47
48
/* MPI headers. */
#ifdef WITH_MPI
#include <mpi.h>
/* METIS headers only used when MPI is also available. */
#ifdef HAVE_METIS
#include <metis.h>
#endif
#endif

49
/* Local headers. */
50
#include "debug.h"
51
52
53
#include "error.h"
#include "partition.h"
#include "space.h"
54
#include "tools.h"
55

56
57
58
/* Maximum weight used for METIS. */
#define metis_maxweight 10000.0f

59
/* Simple descriptions of initial partition types for reports. */
60
const char *initial_partition_name[] = {
61
    "gridded cells", "vectorized point associated cells",
Matthieu Schaller's avatar
Matthieu Schaller committed
62
    "METIS particle weighted cells", "METIS unweighted cells"};
63
64

/* Simple descriptions of repartition types for reports. */
65
const char *repartition_name[] = {
66
    "no",
67
    "METIS edge and vertex task cost weights",
68
    "METIS particle count vertex weights",
69
70
71
72
73
    "METIS task cost edge weights",
    "METIS particle count vertex and task cost edge weights",
    "METIS vertex task costs and edge delta timebin weights",
    "METIS particle count vertex and edge delta timebin weights",
    "METIS edge delta timebin weights",
74
};
75

Peter W. Draper's avatar
Peter W. Draper committed
76
77
78
/* Local functions, if needed. */
static int check_complete(struct space *s, int verbose, int nregions);

79
80
81
/*  Vectorisation support */
/*  ===================== */

82
#if defined(WITH_MPI)
83
/**
84
 *  @brief Pick a number of cell positions from a vectorised list.
85
 *
86
87
88
 *  Vectorise the cell space and pick positions in it for the number of
 *  expected regions using a single step. Vectorisation is guaranteed
 *  to work, providing there are more cells than regions.
89
90
 *
 *  @param s the space.
91
92
 *  @param nregions the number of regions
 *  @param samplecells the list of sample cell positions, size of 3*nregions
93
 */
94
static void pick_vector(struct space *s, int nregions, int *samplecells) {
95
96
97

  /* Get length of space and divide up. */
  int length = s->cdim[0] * s->cdim[1] * s->cdim[2];
98
99
100
  if (nregions > length) {
    error("Too few cells (%d) for this number of regions (%d)", length,
          nregions);
101
102
  }

103
  int step = length / nregions;
104
105
106
107
108
109
110
  int n = 0;
  int m = 0;
  int l = 0;

  for (int i = 0; i < s->cdim[0]; i++) {
    for (int j = 0; j < s->cdim[1]; j++) {
      for (int k = 0; k < s->cdim[2]; k++) {
111
        if (n == 0 && l < nregions) {
112
113
114
115
116
117
118
119
120
121
122
          samplecells[m++] = i;
          samplecells[m++] = j;
          samplecells[m++] = k;
          l++;
        }
        n++;
        if (n == step) n = 0;
      }
    }
  }
}
123
#endif
124

125
#if defined(WITH_MPI)
126
127
128
/**
 * @brief Partition the space.
 *
Peter W. Draper's avatar
Peter W. Draper committed
129
130
 * Using the sample positions as seeds pick cells that are geometrically
 * closest and apply the partition to the space.
131
 */
132
static void split_vector(struct space *s, int nregions, int *samplecells) {
133
134
135
136
137
138
139
  int n = 0;
  for (int i = 0; i < s->cdim[0]; i++) {
    for (int j = 0; j < s->cdim[1]; j++) {
      for (int k = 0; k < s->cdim[2]; k++) {
        int select = -1;
        float rsqmax = FLT_MAX;
        int m = 0;
140
        for (int l = 0; l < nregions; l++) {
141
142
143
144
145
146
147
148
149
          float dx = samplecells[m++] - i;
          float dy = samplecells[m++] - j;
          float dz = samplecells[m++] - k;
          float rsq = (dx * dx + dy * dy + dz * dz);
          if (rsq < rsqmax) {
            rsqmax = rsq;
            select = l;
          }
        }
150
        s->cells_top[n++].nodeID = select;
151
152
153
154
      }
    }
  }
}
155
#endif
156
157
158
159
160
161
162

/* METIS support
 * =============
 *
 * METIS partitions using a multi-level k-way scheme. We support using this in
 * a unweighted scheme, which works well and seems to be guaranteed, and a
 * weighted by the number of particles scheme. Note METIS is optional.
Peter W. Draper's avatar
Peter W. Draper committed
163
164
165
166
 *
 * Repartitioning is based on METIS and uses weights determined from the times
 * that cell tasks have taken. These weight the graph edges and vertices, or
 * just the edges, with vertex weights from the particle counts or none.
167
168
 */

Peter W. Draper's avatar
Peter W. Draper committed
169
#if defined(WITH_MPI) && defined(HAVE_METIS)
170
/**
171
172
173
174
175
176
177
178
179
180
181
182
183
 * @brief Fill the METIS xadj and adjncy arrays defining the graph of cells
 *        in a space.
 *
 * See the METIS manual if you want to understand this format. The cell graph
 * consists of all nodes as vertices with edges as the connections to all
 * neighbours, so we have 26 per vertex.
 *
 * @param s the space of cells.
 * @param adjncy the METIS adjncy array to fill, must be of size
 *               26 * the number of cells in the space.
 * @param xadj the METIS xadj array to fill, must be of size
 *             number of cells in space + 1. NULL for not used.
 */
184
static void graph_init_metis(struct space *s, idx_t *adjncy, idx_t *xadj) {
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

  /* Loop over all cells in the space. */
  int cid = 0;
  for (int l = 0; l < s->cdim[0]; l++) {
    for (int m = 0; m < s->cdim[1]; m++) {
      for (int n = 0; n < s->cdim[2]; n++) {

        /* Visit all neighbours of this cell, wrapping space at edges. */
        int p = 0;
        for (int i = -1; i <= 1; i++) {
          int ii = l + i;
          if (ii < 0)
            ii += s->cdim[0];
          else if (ii >= s->cdim[0])
            ii -= s->cdim[0];
          for (int j = -1; j <= 1; j++) {
            int jj = m + j;
            if (jj < 0)
              jj += s->cdim[1];
            else if (jj >= s->cdim[1])
              jj -= s->cdim[1];
            for (int k = -1; k <= 1; k++) {
              int kk = n + k;
              if (kk < 0)
                kk += s->cdim[2];
              else if (kk >= s->cdim[2])
                kk -= s->cdim[2];

              /* If not self, record id of neighbour. */
              if (i || j || k) {
                adjncy[cid * 26 + p] = cell_getid(s->cdim, ii, jj, kk);
                p++;
              }
            }
          }
        }

        /* Next cell. */
        cid++;
      }
    }
  }

  /* If given set xadj. */
  if (xadj != NULL) {
    xadj[0] = 0;
    for (int k = 0; k < s->nr_cells; k++) xadj[k + 1] = xadj[k] + 26;
  }
}
#endif

Peter W. Draper's avatar
Peter W. Draper committed
236
#if defined(WITH_MPI) && defined(HAVE_METIS)
237
238
239
240
241
242
243
/**
 * @brief Accumulate the counts of particles per cell.
 *
 * @param s the space containing the cells.
 * @param counts the number of particles per cell. Should be
 *               allocated as size s->nr_parts.
 */
244
static void accumulate_counts(struct space *s, double *counts) {
245
246
247

  struct part *parts = s->parts;
  int *cdim = s->cdim;
248
  double iwidth[3] = {s->iwidth[0], s->iwidth[1], s->iwidth[2]};
249
  double dim[3] = {s->dim[0], s->dim[1], s->dim[2]};
250

251
  bzero(counts, sizeof(double) * s->nr_cells);
252

253
  for (size_t k = 0; k < s->nr_parts; k++) {
254
255
256
257
258
259
    for (int j = 0; j < 3; j++) {
      if (parts[k].x[j] < 0.0)
        parts[k].x[j] += dim[j];
      else if (parts[k].x[j] >= dim[j])
        parts[k].x[j] -= dim[j];
    }
James Willis's avatar
James Willis committed
260
261
262
    const int cid =
        cell_getid(cdim, parts[k].x[0] * iwidth[0], parts[k].x[1] * iwidth[1],
                   parts[k].x[2] * iwidth[2]);
263
264
265
266
267
    counts[cid]++;
  }
}
#endif

268
269
270
271
272
273
274
275
276
277
278
279
280
#if defined(WITH_MPI) && defined(HAVE_METIS)
/**
 * @brief Apply METIS cell list partitioning to a cell structure.
 *
 * Uses the results of part_metis_pick to assign each cell's nodeID to the
 * picked region index, thus partitioning the space into regions.
 *
 * @param s the space containing the cells to split into regions.
 * @param nregions number of regions.
 * @param celllist list of regions for each cell.
 */
static void split_metis(struct space *s, int nregions, int *celllist) {

281
  for (int i = 0; i < s->nr_cells; i++) s->cells_top[i].nodeID = celllist[i];
282

283
284
  /* To check or visualise the partition dump all the cells. */
  /* dumpCellRanks("metis_partition", s->cells_top, s->nr_cells); */
285
286
287
288
}
#endif

#if defined(WITH_MPI) && defined(HAVE_METIS)
289
290
291
292
293
294
295
296
297
298
299
300

/* qsort support. */
struct indexval {
  int index;
  int count;
};
static int indexvalcmp(const void *p1, const void *p2) {
  const struct indexval *iv1 = (const struct indexval *)p1;
  const struct indexval *iv2 = (const struct indexval *)p2;
  return iv2->count - iv1->count;
}

301
302
303
304
/**
 * @brief Partition the given space into a number of connected regions.
 *
 * Split the space using METIS to derive a partitions using the
305
 * given edge and vertex weights. If no weights are given then an
Peter W. Draper's avatar
Peter W. Draper committed
306
 * unweighted partition is performed.
307
308
309
310
 *
 * @param s the space of cells to partition.
 * @param nregions the number of regions required in the partition.
 * @param vertexw weights for the cells, sizeof number of cells if used,
311
 *        NULL for unit weights. Need to be in the range of idx_t.
312
313
 * @param edgew weights for the graph edges between all cells, sizeof number
 *        of cells * 26 if used, NULL for unit weights. Need to be packed
314
315
 *        in CSR format, so same as adjncy array. Need to be in the range of
 *        idx_t.
316
317
318
 * @param celllist on exit this contains the ids of the selected regions,
 *        sizeof number of cells.
 */
319
320
static void pick_metis(struct space *s, int nregions, double *vertexw,
                       double *edgew, int *celllist) {
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356

  /* Total number of cells. */
  int ncells = s->cdim[0] * s->cdim[1] * s->cdim[2];

  /* Nothing much to do if only using a single partition. Also avoids METIS
   * bug that doesn't handle this case well. */
  if (nregions == 1) {
    for (int i = 0; i < ncells; i++) celllist[i] = 0;
    return;
  }

  /* Allocate weights and adjacency arrays . */
  idx_t *xadj;
  if ((xadj = (idx_t *)malloc(sizeof(idx_t) * (ncells + 1))) == NULL)
    error("Failed to allocate xadj buffer.");
  idx_t *adjncy;
  if ((adjncy = (idx_t *)malloc(sizeof(idx_t) * 26 * ncells)) == NULL)
    error("Failed to allocate adjncy array.");
  idx_t *weights_v = NULL;
  if (vertexw != NULL)
    if ((weights_v = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL)
      error("Failed to allocate vertex weights array");
  idx_t *weights_e = NULL;
  if (edgew != NULL)
    if ((weights_e = (idx_t *)malloc(26 * sizeof(idx_t) * ncells)) == NULL)
      error("Failed to allocate edge weights array");
  idx_t *regionid;
  if ((regionid = (idx_t *)malloc(sizeof(idx_t) * ncells)) == NULL)
    error("Failed to allocate regionid array");

  /* Define the cell graph. */
  graph_init_metis(s, adjncy, xadj);

  /* Init the vertex weights array. */
  if (vertexw != NULL) {
    for (int k = 0; k < ncells; k++) {
357
      if (vertexw[k] > 1) {
358
359
360
361
362
        weights_v[k] = vertexw[k];
      } else {
        weights_v[k] = 1;
      }
    }
363
364
365
366
367
368
369
370
371
372

#ifdef SWIFT_DEBUG_CHECKS
    /* Check weights are all in range. */
    int failed = 0;
    for (int k = 0; k < ncells; k++) {
      if ((idx_t)vertexw[k] < 0) {
        message("Input vertex weight out of range: %ld", (long)vertexw[k]);
        failed++;
      }
      if (weights_v[k] < 1) {
373
        message("Used vertex weight  out of range: %" PRIDX, weights_v[k]);
374
375
376
        failed++;
      }
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
377
    if (failed > 0) error("%d vertex weights are out of range", failed);
378
#endif
379
380
381
382
383
  }

  /* Init the edges weights array. */
  if (edgew != NULL) {
    for (int k = 0; k < 26 * ncells; k++) {
384
      if (edgew[k] > 1) {
385
386
387
388
389
        weights_e[k] = edgew[k];
      } else {
        weights_e[k] = 1;
      }
    }
390
391
392
393
394
395
396
397
398
399
400

#ifdef SWIFT_DEBUG_CHECKS
    /* Check weights are all in range. */
    int failed = 0;
    for (int k = 0; k < 26 * ncells; k++) {

      if ((idx_t)edgew[k] < 0) {
        message("Input edge weight out of range: %ld", (long)edgew[k]);
        failed++;
      }
      if (weights_e[k] < 1) {
401
        message("Used edge weight out of range: %" PRIDX, weights_e[k]);
402
403
404
        failed++;
      }
    }
Matthieu Schaller's avatar
Matthieu Schaller committed
405
    if (failed > 0) error("%d edge weights are out of range", failed);
406
#endif
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
  }

  /* Set the METIS options. */
  idx_t options[METIS_NOPTIONS];
  METIS_SetDefaultOptions(options);
  options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
  options[METIS_OPTION_NUMBERING] = 0;
  options[METIS_OPTION_CONTIG] = 1;
  options[METIS_OPTION_NCUTS] = 10;
  options[METIS_OPTION_NITER] = 20;

  /* Call METIS. */
  idx_t one = 1;
  idx_t idx_ncells = ncells;
  idx_t idx_nregions = nregions;
  idx_t objval;

  /* Dump graph in METIS format */
425
426
  /*dumpMETISGraph("metis_graph", idx_ncells, one, xadj, adjncy,
   *               weights_v, NULL, weights_e);
427
   */
428
  if (METIS_PartGraphKway(&idx_ncells, &one, xadj, adjncy, weights_v, NULL,
Matthieu Schaller's avatar
Matthieu Schaller committed
429
430
                          weights_e, &idx_nregions, NULL, NULL, options,
                          &objval, regionid) != METIS_OK)
431
432
433
434
435
436
437
    error("Call to METIS_PartGraphKway failed.");

  /* Check that the regionids are ok. */
  for (int k = 0; k < ncells; k++)
    if (regionid[k] < 0 || regionid[k] >= nregions)
      error("Got bad nodeID %" PRIDX " for cell %i.", regionid[k], k);

438
439
  /* We want a solution in which the current regions of the space are
   * preserved when possible, to avoid unneccesary particle movement.
Peter W. Draper's avatar
Peter W. Draper committed
440
441
442
443
444
445
   * So create a 2d-array of cells counts that are common to all pairs
   * of old and new ranks. Each element of the array has a cell count and
   * an unique index so we can sort into decreasing counts. */
  int indmax = nregions * nregions;
  struct indexval *ivs = malloc(sizeof(struct indexval) * indmax);
  bzero(ivs, sizeof(struct indexval) * indmax);
446
  for (int k = 0; k < ncells; k++) {
Peter W. Draper's avatar
Peter W. Draper committed
447
448
449
    int index = regionid[k] + nregions * s->cells_top[k].nodeID;
    ivs[index].count++;
    ivs[index].index = index;
450
  }
Peter W. Draper's avatar
Peter W. Draper committed
451
  qsort(ivs, indmax, sizeof(struct indexval), indexvalcmp);
452
453
454
455
456
457

  /* Go through the ivs using the largest counts first, these are the
   * regions with the most cells in common, old partition to new. */
  int *oldmap = malloc(sizeof(int) * nregions);
  int *newmap = malloc(sizeof(int) * nregions);
  for (int k = 0; k < nregions; k++) {
Peter W. Draper's avatar
Peter W. Draper committed
458
459
    oldmap[k] = -1;
    newmap[k] = -1;
460
  }
Peter W. Draper's avatar
Peter W. Draper committed
461
  for (int k = 0; k < indmax; k++) {
462
463
464
465
466
467
468
469
470
471
472
473
474

    /* Stop when all regions with common cells have been considered. */
    if (ivs[k].count == 0) break;

    /* Store old and new IDs, if not already used. */
    int oldregion = ivs[k].index / nregions;
    int newregion = ivs[k].index - oldregion * nregions;
    if (newmap[newregion] == -1 && oldmap[oldregion] == -1) {
      newmap[newregion] = oldregion;
      oldmap[oldregion] = newregion;
    }
  }

475
476
  /* Handle any regions that did not get selected by picking an unused rank
   * from oldmap and assigning to newmap. */
477
478
479
480
481
482
483
484
485
486
487
488
489
490
  int spare = 0;
  for (int k = 0; k < nregions; k++) {
    if (newmap[k] == -1) {
      for (int j = spare; j < nregions; j++) {
        if (oldmap[j] == -1) {
          newmap[k] = j;
          oldmap[j] = j;
          spare = j;
          break;
        }
      }
    }
  }

491
492
  /* Set the cell list to the region index. */
  for (int k = 0; k < ncells; k++) {
493
    celllist[k] = newmap[regionid[k]];
494
495
496
497
498
  }

  /* Clean up. */
  if (weights_v != NULL) free(weights_v);
  if (weights_e != NULL) free(weights_e);
499
500
501
  free(ivs);
  free(oldmap);
  free(newmap);
502
503
504
505
506
507
  free(xadj);
  free(adjncy);
  free(regionid);
}
#endif

Peter W. Draper's avatar
Peter W. Draper committed
508
#if defined(WITH_MPI) && defined(HAVE_METIS)
509
/**
510
511
 * @brief Repartition the cells amongst the nodes using task costs
 *        as edge weights and vertex weights also from task costs
512
513
514
515
516
 *        or particle cells counts.
 *
 * @param partweights whether particle counts will be used as vertex weights.
 * @param bothweights whether vertex and edge weights will be used, otherwise
 *                    only edge weights will be used.
517
 * @param timebins use timebins as edge weights.
Peter W. Draper's avatar
Peter W. Draper committed
518
519
520
521
522
 * @param nodeID our nodeID.
 * @param nr_nodes the number of nodes.
 * @param s the space of cells holding our local particles.
 * @param tasks the completed tasks from the last engine step for our node.
 * @param nr_tasks the number of tasks.
523
 */
524
525
526
static void repart_edge_metis(int partweights, int bothweights, int timebins,
                              int nodeID, int nr_nodes, struct space *s,
                              struct task *tasks, int nr_tasks) {
527
528

  /* Create weight arrays using task ticks for vertices and edges (edges
529
   * assume the same graph structure as used in the part_ calls). */
530
  int nr_cells = s->nr_cells;
531
  struct cell *cells = s->cells_top;
532
533
534
535
536
537
538
539
540

  /* Allocate and fill the adjncy indexing array defining the graph of
   * cells. */
  idx_t *inds;
  if ((inds = (idx_t *)malloc(sizeof(idx_t) * 26 * nr_cells)) == NULL)
    error("Failed to allocate the inds array");
  graph_init_metis(s, inds, NULL);

  /* Allocate and init weights. */
541
542
  double *weights_v = NULL;
  double *weights_e = NULL;
543
  if (bothweights) {
544
    if ((weights_v = (double *)malloc(sizeof(double) * nr_cells)) == NULL)
545
      error("Failed to allocate vertex weights arrays.");
546
    bzero(weights_v, sizeof(double) * nr_cells);
547
  }
548
  if ((weights_e = (double *)malloc(sizeof(double) * 26 * nr_cells)) == NULL)
Matthieu Schaller's avatar
Matthieu Schaller committed
549
    error("Failed to allocate edge weights arrays.");
550
  bzero(weights_e, sizeof(double) * 26 * nr_cells);
551
552
553
554
555
556
557
558
559
560

  /* Generate task weights for vertices. */
  int taskvweights = (bothweights && !partweights);

  /* Loop over the tasks... */
  for (int j = 0; j < nr_tasks; j++) {
    /* Get a pointer to the kth task. */
    struct task *t = &tasks[j];

    /* Skip un-interesting tasks. */
Matthieu Schaller's avatar
Matthieu Schaller committed
561
    if (t->cost == 0) continue;
562

563
    /* Get the task weight based on costs. */
Peter W. Draper's avatar
Peter W. Draper committed
564
    double w = (double)t->cost;
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579

    /* Get the top-level cells involved. */
    struct cell *ci, *cj;
    for (ci = t->ci; ci->parent != NULL; ci = ci->parent)
      ;
    if (t->cj != NULL)
      for (cj = t->cj; cj->parent != NULL; cj = cj->parent)
        ;
    else
      cj = NULL;

    /* Get the cell IDs. */
    int cid = ci - cells;

    /* Different weights for different tasks. */
Matthieu Schaller's avatar
Matthieu Schaller committed
580
    if (t->type == task_type_ghost || t->type == task_type_kick1 ||
Matthieu Schaller's avatar
Matthieu Schaller committed
581
        t->type == task_type_kick2 || t->type == task_type_timestep ||
582
        t->type == task_type_drift_part || t->type == task_type_drift_gpart) {
583

584
      /* Particle updates add only to vertex weight. */
Matthieu Schaller's avatar
Matthieu Schaller committed
585
      if (taskvweights) weights_v[cid] += w;
586
587
588
589
    }

    /* Self interaction? */
    else if ((t->type == task_type_self && ci->nodeID == nodeID) ||
590
591
             (t->type == task_type_sub_self && cj == NULL &&
              ci->nodeID == nodeID)) {
592
      /* Self interactions add only to vertex weight. */
Matthieu Schaller's avatar
Matthieu Schaller committed
593
      if (taskvweights) weights_v[cid] += w;
594
595
596
597

    }

    /* Pair? */
Matthieu Schaller's avatar
Matthieu Schaller committed
598
    else if (t->type == task_type_pair || (t->type == task_type_sub_pair)) {
599
600
601
      /* In-cell pair? */
      if (ci == cj) {
        /* Add weight to vertex for ci. */
Matthieu Schaller's avatar
Matthieu Schaller committed
602
        if (taskvweights) weights_v[cid] += w;
603
604
605

      }

606
607
      /* Distinct cells. */
      else {
608
609
610
        /* Index of the jth cell. */
        int cjd = cj - cells;

611
612
613
        /* Local cells add weight to vertices. */
        if (taskvweights && ci->nodeID == nodeID) {
          weights_v[cid] += 0.5 * w;
614
615
616
          if (cj->nodeID == nodeID) weights_v[cjd] += 0.5 * w;
        }

617
618
619
620
621
        if (timebins) {
          /* Add weights to edge for all cells based on the expected
           * interaction time (calculated as the time to the last expected
           * time) as we want to avoid having active cells on the edges, so we
           * cut for that. Note that weight is added to the local and remote
622
623
           * cells, as we want to keep both away from any cuts, this can
           * overflow int, so take care. */
624
625
          int dti = num_time_bins - get_time_bin(ci->ti_end_min);
          int dtj = num_time_bins - get_time_bin(cj->ti_end_min);
Peter W. Draper's avatar
Peter W. Draper committed
626
          double dt = (double)(1 << dti) + (double)(1 << dtj);
627
628
629

          /* ci */
          int kk;
Peter W. Draper's avatar
Peter W. Draper committed
630
631
          for (kk = 26 * cid; inds[kk] != cjd; kk++)
            ;
632
633
634
          weights_e[kk] += dt;

          /* cj */
Peter W. Draper's avatar
Peter W. Draper committed
635
636
          for (kk = 26 * cjd; inds[kk] != cid; kk++)
            ;
637
638
639
640
641
642
643
          weights_e[kk] += dt;
        } else {

          /* Add weights from task costs to the edge. */

          /* ci */
          int kk;
Peter W. Draper's avatar
Peter W. Draper committed
644
645
          for (kk = 26 * cid; inds[kk] != cjd; kk++)
            ;
646
647
648
          weights_e[kk] += w;

          /* cj */
Peter W. Draper's avatar
Peter W. Draper committed
649
650
          for (kk = 26 * cjd; inds[kk] != cid; kk++)
            ;
651
652
          weights_e[kk] += w;
        }
653
654
655
656
657
      }
    }
  }

  /* Re-calculate the vertices if using particle counts. */
658
  if (partweights && bothweights) accumulate_counts(s, weights_v);
659
660

  /* Merge the weights arrays across all nodes. */
661
  int res;
662
663
  if (bothweights) {
    if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_v, weights_v,
Peter W. Draper's avatar
Peter W. Draper committed
664
665
                          nr_cells, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD)) !=
        MPI_SUCCESS)
666
667
668
669
      mpi_error(res, "Failed to allreduce vertex weights.");
  }

  if ((res = MPI_Reduce((nodeID == 0) ? MPI_IN_PLACE : weights_e, weights_e,
Peter W. Draper's avatar
Peter W. Draper committed
670
671
                        26 * nr_cells, MPI_DOUBLE, MPI_SUM, 0,
                        MPI_COMM_WORLD)) != MPI_SUCCESS)
672
673
674
675
676
677
678
679
    mpi_error(res, "Failed to allreduce edge weights.");

  /* Allocate cell list for the partition. */
  int *celllist = (int *)malloc(sizeof(int) * s->nr_cells);
  if (celllist == NULL) error("Failed to allocate celllist");

  /* As of here, only one node needs to compute the partition. */
  if (nodeID == 0) {
680

681
    /* We need to rescale the weights into the range of an integer for METIS
Peter W. Draper's avatar
Peter W. Draper committed
682
683
     * (really range of idx_t). Also we would like the range of vertex and
     * edges weights to be similar so they balance. */
684
685
    double wminv = 0.0;
    double wmaxv = 0.0;
686
    if (bothweights) {
687
688
      wminv = weights_v[0];
      wmaxv = weights_v[0];
689
      for (int k = 0; k < nr_cells; k++) {
690
691
        wmaxv = weights_v[k] > wmaxv ? weights_v[k] : wmaxv;
        wminv = weights_v[k] < wminv ? weights_v[k] : wminv;
692
693
694
      }
    }

695
696
    double wmine = weights_e[0];
    double wmaxe = weights_e[0];
697
    for (int k = 0; k < 26 * nr_cells; k++) {
698
699
      wmaxe = weights_e[k] > wmaxe ? weights_e[k] : wmaxe;
      wmine = weights_e[k] < wmine ? weights_e[k] : wmine;
700
    }
701
702
703

    if (bothweights) {

704
705
706
707
708
709
710
      /* Make range the same in both weights systems. */
      if ((wmaxv - wminv) > (wmaxe - wmine)) {
        double wscale = (wmaxv - wminv) / (wmaxe - wmine);
        for (int k = 0; k < 26 * nr_cells; k++) {
          weights_e[k] = (weights_e[k] - wmine) * wscale + wminv;
        }
        wmine = wminv;
711
        wmaxe = wmaxv;
712

713
      } else {
714
715
716
717
718
        double wscale = (wmaxe - wmine) / (wmaxv - wminv);
        for (int k = 0; k < nr_cells; k++) {
          weights_v[k] = (weights_v[k] - wminv) * wscale + wmine;
        }
        wminv = wmine;
719
720
721
722
        wmaxv = wmaxe;
      }

      /* Scale to the METIS range. */
723
      double wscale = (metis_maxweight - 1.0) / (wmaxv - wminv);
724
      for (int k = 0; k < nr_cells; k++) {
725
        weights_v[k] = (weights_v[k] - wminv) * wscale + 1.0;
726
727
728
      }
    }

729
    /* Scale to the METIS range. */
730
    double wscale = (metis_maxweight - 1.0) / (wmaxe - wmine);
731
    for (int k = 0; k < 26 * nr_cells; k++) {
732
      weights_e[k] = (weights_e[k] - wmine) * wscale + 1.0;
733
    }
734
735
736

    /* And partition, use both weights or not as requested. */
    if (bothweights)
737
      pick_metis(s, nr_nodes, weights_v, weights_e, celllist);
738
    else
739
      pick_metis(s, nr_nodes, NULL, weights_e, celllist);
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760

    /* Check that all cells have good values. */
    for (int k = 0; k < nr_cells; k++)
      if (celllist[k] < 0 || celllist[k] >= nr_nodes)
        error("Got bad nodeID %d for cell %i.", celllist[k], k);

    /* Check that the partition is complete and all nodes have some work. */
    int present[nr_nodes];
    int failed = 0;
    for (int i = 0; i < nr_nodes; i++) present[i] = 0;
    for (int i = 0; i < nr_cells; i++) present[celllist[i]]++;
    for (int i = 0; i < nr_nodes; i++) {
      if (!present[i]) {
        failed = 1;
        message("Node %d is not present after repartition", i);
      }
    }

    /* If partition failed continue with the current one, but make this
     * clear. */
    if (failed) {
Matthieu Schaller's avatar
Matthieu Schaller committed
761
762
763
      message(
          "WARNING: METIS repartition has failed, continuing with "
          "the current partition, load balance will not be optimal");
764
765
766
767
768
769
770
771
772
773
      for (int k = 0; k < nr_cells; k++) celllist[k] = cells[k].nodeID;
    }
  }

  /* Distribute the celllist partition and apply. */
  if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) !=
      MPI_SUCCESS)
    mpi_error(res, "Failed to bcast the cell list");

  /* And apply to our cells */
774
  split_metis(s, nr_nodes, celllist);
775
776

  /* Clean up. */
777
  free(inds);
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
  if (bothweights) free(weights_v);
  free(weights_e);
  free(celllist);
}
#endif

/**
 * @brief Repartition the cells amongst the nodes using vertex weights
 *
 * @param s The space containing the local cells.
 * @param nodeID our MPI node id.
 * @param nr_nodes number of MPI nodes.
 */
#if defined(WITH_MPI) && defined(HAVE_METIS)
static void repart_vertex_metis(struct space *s, int nodeID, int nr_nodes) {

  /* Use particle counts as vertex weights. */
  /* Space for particles per cell counts, which will be used as weights. */
796
797
  double *weights = NULL;
  if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL)
798
799
800
801
802
803
804
    error("Failed to allocate weights buffer.");

  /* Check each particle and accumulate the counts per cell. */
  accumulate_counts(s, weights);

  /* Get all the counts from all the nodes. */
  int res;
805
806
  if ((res = MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE,
                           MPI_SUM, MPI_COMM_WORLD)) != MPI_SUCCESS)
807
808
809
810
811
812
    mpi_error(res, "Failed to allreduce particle cell weights.");

  /* Main node does the partition calculation. */
  int *celllist = (int *)malloc(sizeof(int) * s->nr_cells);
  if (celllist == NULL) error("Failed to allocate celllist");

Matthieu Schaller's avatar
Matthieu Schaller committed
813
  if (nodeID == 0) pick_metis(s, nr_nodes, weights, NULL, celllist);
814
815
816
817
818
819
820

  /* Distribute the celllist partition and apply. */
  if ((res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD)) !=
      MPI_SUCCESS)
    mpi_error(res, "Failed to bcast the cell list");

  /* And apply to our cells */
821
  split_metis(s, nr_nodes, celllist);
822
823
824
825
826
827
828
829
830
831
832
833

  free(weights);
  free(celllist);
}
#endif

/**
 * @brief Repartition the space using the given repartition type.
 *
 * Note that at the end of this process all the cells will be re-distributed
 * across the nodes, but the particles themselves will not be.
 *
834
 * @param reparttype #repartition struct
835
836
837
838
839
840
 * @param nodeID our nodeID.
 * @param nr_nodes the number of nodes.
 * @param s the space of cells holding our local particles.
 * @param tasks the completed tasks from the last engine step for our node.
 * @param nr_tasks the number of tasks.
 */
841
void partition_repartition(struct repartition *reparttype, int nodeID,
842
843
                           int nr_nodes, struct space *s, struct task *tasks,
                           int nr_tasks) {
844
845
846

#if defined(WITH_MPI) && defined(HAVE_METIS)

847
  if (reparttype->type == REPART_METIS_VERTEX_COSTS_EDGE_COSTS) {
848
    repart_edge_metis(0, 1, 0, nodeID, nr_nodes, s, tasks, nr_tasks);
849

850
851
  } else if (reparttype->type == REPART_METIS_EDGE_COSTS) {
    repart_edge_metis(0, 0, 0, nodeID, nr_nodes, s, tasks, nr_tasks);
852

853
  } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS_EDGE_COSTS) {
854
    repart_edge_metis(1, 1, 0, nodeID, nr_nodes, s, tasks, nr_tasks);
855

856
  } else if (reparttype->type == REPART_METIS_VERTEX_COSTS_EDGE_TIMEBINS) {
857
858
859
    repart_edge_metis(0, 1, 1, nodeID, nr_nodes, s, tasks, nr_tasks);

  } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS_EDGE_TIMEBINS) {
860
    repart_edge_metis(1, 1, 1, nodeID, nr_nodes, s, tasks, nr_tasks);
861

862
863
864
  } else if (reparttype->type == REPART_METIS_EDGE_TIMEBINS) {
    repart_edge_metis(0, 0, 1, nodeID, nr_nodes, s, tasks, nr_tasks);

865
  } else if (reparttype->type == REPART_METIS_VERTEX_COUNTS) {
866
867
    repart_vertex_metis(s, nodeID, nr_nodes);

868
869
870
  } else if (reparttype->type == REPART_NONE) {
    /* Doing nothing. */

871
  } else {
872
    error("Impossible repartition type");
873
874
875
876
877
878
  }
#else
  error("SWIFT was not compiled with METIS support.");
#endif
}

879
/**
Peter W. Draper's avatar
Peter W. Draper committed
880
 * @brief Initial partition of space cells.
881
 *
882
883
884
 * Cells are assigned to a node on the basis of various schemes, all of which
 * should attempt to distribute them in geometrically close regions to
 * minimise the movement of particles.
885
 *
886
887
888
889
 * Note that the partition type is a suggestion and will be ignored if that
 * scheme fails. In that case we fallback to a vectorised scheme, that is
 * guaranteed to work provided we have more cells than nodes.
 *
890
 * @param initial_partition the type of partitioning to try.
891
892
893
 * @param nodeID our nodeID.
 * @param nr_nodes the number of nodes.
 * @param s the space of cells.
894
 */
895
896
void partition_initial_partition(struct partition *initial_partition,
                                 int nodeID, int nr_nodes, struct space *s) {
897
898

  /* Geometric grid partitioning. */
899
  if (initial_partition->type == INITPART_GRID) {
900
901
902
903
904
    int j, k;
    int ind[3];
    struct cell *c;

    /* If we've got the wrong number of nodes, fail. */
905
906
907
    if (nr_nodes !=
        initial_partition->grid[0] * initial_partition->grid[1] *
            initial_partition->grid[2])
908
909
910
911
912
      error("Grid size does not match number of nodes.");

    /* Run through the cells and set their nodeID. */
    // message("s->dim = [%e,%e,%e]", s->dim[0], s->dim[1], s->dim[2]);
    for (k = 0; k < s->nr_cells; k++) {
913
      c = &s->cells_top[k];
914
      for (j = 0; j < 3; j++)
Matthieu Schaller's avatar
Matthieu Schaller committed
915
        ind[j] = c->loc[j] / s->dim[j] * initial_partition->grid[j];
916
917
918
      c->nodeID = ind[0] +
                  initial_partition->grid[0] *
                      (ind[1] + initial_partition->grid[1] * ind[2]);
919
920
921
      // message("cell at [%e,%e,%e]: ind = [%i,%i,%i], nodeID = %i", c->loc[0],
      // c->loc[1], c->loc[2], ind[0], ind[1], ind[2], c->nodeID);
    }
922

923
    /* The grid technique can fail, so check for this before proceeding. */
Peter W. Draper's avatar
Peter W. Draper committed
924
    if (!check_complete(s, (nodeID == 0), nr_nodes)) {
925
926
      if (nodeID == 0)
        message("Grid initial partition failed, using a vectorised partition");
927
928
      initial_partition->type = INITPART_VECTORIZE;
      partition_initial_partition(initial_partition, nodeID, nr_nodes, s);
929
930
      return;
    }
931

932
933
  } else if (initial_partition->type == INITPART_METIS_WEIGHT ||
             initial_partition->type == INITPART_METIS_NOWEIGHT) {
934
935
936
937
938
939
940
#if defined(WITH_MPI) && defined(HAVE_METIS)
    /* Simple k-way partition selected by METIS using cell particle counts as
     * weights or not. Should be best when starting with a inhomogeneous dist.
     */

    /* Space for particles per cell counts, which will be used as weights or
     * not. */
941
    double *weights = NULL;
942
    if (initial_partition->type == INITPART_METIS_WEIGHT) {
943
      if ((weights = (double *)malloc(sizeof(double) * s->nr_cells)) == NULL)
944
        error("Failed to allocate weights buffer.");
945
      bzero(weights, sizeof(double) * s->nr_cells);
946
947

      /* Check each particle and accumilate the counts per cell. */
Peter W. Draper's avatar
Peter W. Draper committed
948
      accumulate_counts(s, weights);
949

950
      /* Get all the counts from all the nodes. */
Peter W. Draper's avatar
Peter W. Draper committed
951
952
      if (MPI_Allreduce(MPI_IN_PLACE, weights, s->nr_cells, MPI_DOUBLE, MPI_SUM,
                        MPI_COMM_WORLD) != MPI_SUCCESS)
953
        error("Failed to allreduce particle cell weights.");
954
    }
955

956
957
958
    /* Main node does the partition calculation. */
    int *celllist = (int *)malloc(sizeof(int) * s->nr_cells);
    if (celllist == NULL) error("Failed to allocate celllist");
Matthieu Schaller's avatar
Matthieu Schaller committed
959
    if (nodeID == 0) pick_metis(s, nr_nodes, weights, NULL, celllist);
960
961
962
963
964
965
966
967

    /* Distribute the celllist partition and apply. */
    int res = MPI_Bcast(celllist, s->nr_cells, MPI_INT, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to bcast the cell list");

    /* And apply to our cells */
    split_metis(s, nr_nodes, celllist);

968
969
    /* It's not known if this can fail, but check for this before
     * proceeding. */
Peter W. Draper's avatar
Peter W. Draper committed
970
    if (!check_complete(s, (nodeID == 0), nr_nodes)) {
971
972
      if (nodeID == 0)
        message("METIS initial partition failed, using a vectorised partition");
973
974
      initial_partition->type = INITPART_VECTORIZE;
      partition_initial_partition(initial_partition, nodeID, nr_nodes, s);
975
    }
976

977
978
979
980
981
    if (weights != NULL) free(weights);
    free(celllist);
#else
    error("SWIFT was not compiled with METIS support");
#endif
982

983
  } else if (initial_partition->type == INITPART_VECTORIZE) {
984

985
986
987
988
989
#if defined(WITH_MPI)
    /* Vectorised selection, guaranteed to work for samples less than the
     * number of cells, but not very clumpy in the selection of regions. */
    int *samplecells = (int *)malloc(sizeof(int) * nr_nodes * 3);
    if (samplecells == NULL) error("Failed to allocate samplecells");
990

991
992
993
    if (nodeID == 0) {
      pick_vector(s, nr_nodes, samplecells);
    }
994

995
    /* Share the samplecells around all the nodes. */
Matthieu Schaller's avatar
Matthieu Schaller committed
996
    int res = MPI_Bcast(samplecells, nr_nodes * 3, MPI_INT, 0, MPI_COMM_WORLD);
997
998
    if (res != MPI_SUCCESS)
      mpi_error(res, "Failed to bcast the partition sample cells.");
999

1000
    /* And apply to our cells */
For faster browsing, not all history is shown. View entire blame