task.c 37 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
23
24
25
26
27
28
29
30
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
/* MPI headers. */
#ifdef WITH_MPI
37
#include <mpi.h>
38
39
#endif

40
41
42
/* This object's header. */
#include "task.h"

43
/* Local headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
44
#include "atomic.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "inline.h"
48
#include "lock.h"
49
50

/* Task type names. */
51
52
53
54
55
56
57
58
59
60
61
62
63
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
64
                                             "drift_spart",
65
                                             "drift_bpart",
66
67
                                             "drift_gpart",
                                             "drift_gpart_out",
68
                                             "end_hydro_force",
69
70
71
                                             "kick1",
                                             "kick2",
                                             "timestep",
72
                                             "timestep_limiter",
73
74
75
76
77
78
79
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
80
                                             "grav_end_force",
81
82
                                             "cooling",
                                             "star_formation",
83
84
                                             "star_formation_in",
                                             "star_formation_out",
85
                                             "logger",
86
87
                                             "stars_in",
                                             "stars_out",
88
89
                                             "stars_ghost_in",
                                             "stars_ghost",
James Willis's avatar
James Willis committed
90
                                             "stars_ghost_out",
91
                                             "stars_sort",
92
93
                                             "bh_in",
                                             "bh_out",
94
95
96
                                             "bh_density_ghost",
                                             "bh_swallow_ghost1",
                                             "bh_swallow_ghost2",
97
                                             "bh_swallow_ghost3",
98
                                             "fof_self",
James Willis's avatar
James Willis committed
99
                                             "fof_pair"};
100

101
/* Sub-task type names. */
102
const char *subtaskID_names[task_subtype_count] = {
103
104
105
106
107
108
109
110
    "none",       "density",      "gradient",       "force",
    "limiter",    "grav",         "external_grav",  "tend_part",
    "tend_gpart", "tend_spart",   "tend_bpart",     "xv",
    "rho",        "part_swallow", "bpart_merger",   "gpart",
    "multipole",  "spart",        "stars_density",  "stars_feedback",
    "sf_count",   "bpart_rho",    "bpart_swallow",  "bpart_feedback",
    "bh_density", "bh_swallow",   "do_gas_swallow", "do_bh_swallow",
    "bh_feedback"};
111

112
113
114
115
116
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

117
118
/**
 * @brief Computes the overlap between the parts array of two given cells.
119
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
120
121
122
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
123
 */
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }
141

142
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
143
144
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
145
TASK_CELL_OVERLAP(bpart, black_holes.parts, black_holes.count);
Loic Hausammann's avatar
Loic Hausammann committed
146

147
148
149
150
151
/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
152
153
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {
154
155
156
157
158
159
160

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;

161
    case task_type_drift_part:
162
163
    case task_type_sort:
    case task_type_ghost:
164
    case task_type_extra_ghost:
165
    case task_type_timestep_limiter:
Stefan Arridge's avatar
Stefan Arridge committed
166
    case task_type_cooling:
167
    case task_type_end_hydro_force:
168
169
170
      return task_action_part;
      break;

171
172
173
    case task_type_star_formation:
      return task_action_all;

174
    case task_type_drift_spart:
175
    case task_type_stars_ghost:
Loic Hausammann's avatar
Loic Hausammann committed
176
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
177
178
179
      return task_action_spart;
      break;

180
    case task_type_drift_bpart:
181
    case task_type_bh_density_ghost:
182
    case task_type_bh_swallow_ghost3:
183
184
185
      return task_action_bpart;
      break;

186
187
188
189
190
191
192
    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
193
        case task_subtype_gradient:
194
        case task_subtype_force:
195
        case task_subtype_limiter:
196
197
198
          return task_action_part;
          break;

199
        case task_subtype_stars_density:
Alexei Borissov's avatar
Alexei Borissov committed
200
        case task_subtype_stars_feedback:
201
202
          return task_action_all;
          break;
203

204
205
        case task_subtype_bh_density:
        case task_subtype_bh_feedback:
206
        case task_subtype_bh_swallow:
207
        case task_subtype_do_gas_swallow:
208
209
210
          return task_action_all;
          break;

211
212
213
214
        case task_subtype_do_bh_swallow:
          return task_action_bpart;
          break;

215
        case task_subtype_grav:
216
        case task_subtype_external_grav:
217
218
219
220
          return task_action_gpart;
          break;

        default:
221
222
223
224
#ifdef SWIFT_DEBUG_CHECKS
          error("Unknown task_action for task %s/%s", taskID_names[t->type],
                subtaskID_names[t->subtype]);
#endif
225
226
227
228
229
          return task_action_none;
          break;
      }
      break;

230
231
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
232
    case task_type_logger:
James Willis's avatar
James Willis committed
233
234
    case task_type_fof_self:
    case task_type_fof_pair:
235
    case task_type_timestep:
236
237
    case task_type_send:
    case task_type_recv:
238
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
239
        return task_action_all;
240
      else if (t->ci->hydro.count > 0)
241
        return task_action_part;
242
      else if (t->ci->grav.count > 0)
243
        return task_action_gpart;
244
245
      else {
#ifdef SWIFT_DEBUG_CHECKS
246
        error("Task without particles");
247
248
#endif
      }
249
250
      break;

251
    case task_type_init_grav:
252
    case task_type_grav_mm:
253
    case task_type_grav_long_range:
254
255
256
      return task_action_multipole;
      break;

257
    case task_type_drift_gpart:
258
    case task_type_grav_down:
259
    case task_type_end_grav_force:
260
    case task_type_grav_mesh:
261
      return task_action_gpart;
262
      break;
263

264
    default:
265
266
267
268
#ifdef SWIFT_DEBUG_CHECKS
      error("Unknown task_action for task %s/%s", taskID_names[t->type],
            subtaskID_names[t->subtype]);
#endif
269
270
271
      return task_action_none;
      break;
  }
272

273
274
275
276
277
#ifdef SWIFT_DEBUG_CHECKS
  error("Unknown task_action for task %s/%s", taskID_names[t->type],
        subtaskID_names[t->subtype]);
#endif
  /* Silence compiler warnings. We should never get here. */
278
  return task_action_none;
279
280
}

281
282
283
284
285
286
287
/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
288
289
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {
290
291
292
293
294
295

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

296
297
  /* First check if any of the two tasks are of a type that don't
     use cells. */
298
299
300
301
302
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
303
304
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
305
306
  const int ta_bpart =
      (ta_act == task_action_bpart || ta_act == task_action_all);
307
308
309
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
310
311
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);
312
313
  const int tb_bpart =
      (tb_act == task_action_bpart || tb_act == task_action_all);
314
315
316
317
318
319

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
320
321
322
323
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;
324

325
    if (size_union == 0) return 0.f;
326

327
328
329
330
331
332
333
334
335
336
337
338
339
340
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
341
342
343
344
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;
345

346
347
    if (size_union == 0) return 0.f;

348
349
350
351
352
353
354
355
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }
356

Loic Hausammann's avatar
Loic Hausammann committed
357
358
359
360
361
  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
362
363
364
365
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;
Loic Hausammann's avatar
Loic Hausammann committed
366

367
    if (size_union == 0) return 0.f;
Loic Hausammann's avatar
Loic Hausammann committed
368

Loic Hausammann's avatar
Loic Hausammann committed
369
370
371
372
373
374
375
376
377
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
  /* In the case where both tasks act on bparts */
  else if (ta_bpart && tb_bpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
    if (ta->ci != NULL) size_union += ta->ci->black_holes.count;
    if (ta->cj != NULL) size_union += ta->cj->black_holes.count;
    if (tb->ci != NULL) size_union += tb->ci->black_holes.count;
    if (tb->cj != NULL) size_union += tb->cj->black_holes.count;

    if (size_union == 0) return 0.f;

    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_bpart(ta->ci, tb->ci) +
                                  task_cell_overlap_bpart(ta->ci, tb->cj) +
                                  task_cell_overlap_bpart(ta->cj, tb->ci) +
                                  task_cell_overlap_bpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

399
400
  /* Else, no overlap */
  return 0.f;
401
}
402

403
404
/**
 * @brief Unlock the cell held by this task.
405
 *
406
407
 * @param t The #task.
 */
408
409
void task_unlock(struct task *t) {

410
411
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
412
413
  struct cell *ci = t->ci, *cj = t->cj;

414
  /* Act based on task type. */
415
416
  switch (type) {

417
418
    case task_type_kick1:
    case task_type_kick2:
419
    case task_type_logger:
420
    case task_type_timestep:
421
422
423
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;
Matthieu Schaller's avatar
Matthieu Schaller committed
424

425
    case task_type_drift_part:
426
    case task_type_sort:
427
    case task_type_ghost:
428
    case task_type_end_hydro_force:
429
    case task_type_timestep_limiter:
430
431
432
      cell_unlocktree(ci);
      break;

433
    case task_type_drift_gpart:
434
    case task_type_grav_mesh:
435
    case task_type_end_grav_force:
436
437
438
      cell_gunlocktree(ci);
      break;

Loic Hausammann's avatar
Loic Hausammann committed
439
440
441
442
    case task_type_stars_sort:
      cell_sunlocktree(ci);
      break;

443
    case task_type_self:
444
    case task_type_sub_self:
445
446
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
447
        cell_munlocktree(ci);
448
449
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
450
451
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
452
453
454
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
455
                 (subtype == task_subtype_do_gas_swallow)) {
456
457
        cell_bunlocktree(ci);
        cell_unlocktree(ci);
458
459
      } else if (subtype == task_subtype_do_bh_swallow) {
        cell_bunlocktree(ci);
460
461
462
      } else {
        cell_unlocktree(ci);
      }
463
      break;
464

465
    case task_type_pair:
466
    case task_type_sub_pair:
467
468
469
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
470
471
        cell_munlocktree(ci);
        cell_munlocktree(cj);
472
473
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
474
475
476
477
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
478
479
480
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
481
                 (subtype == task_subtype_do_gas_swallow)) {
482
483
484
485
        cell_bunlocktree(ci);
        cell_bunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
486
487
488
      } else if (subtype == task_subtype_do_bh_swallow) {
        cell_bunlocktree(ci);
        cell_bunlocktree(cj);
489
490
491
492
493
494
      } else {
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

495
    case task_type_grav_down:
496
      cell_gunlocktree(ci);
497
498
499
      cell_munlocktree(ci);
      break;

500
    case task_type_grav_long_range:
501
      cell_munlocktree(ci);
502
      break;
503

504
505
506
507
508
    case task_type_grav_mm:
      cell_munlocktree(ci);
      cell_munlocktree(cj);
      break;

509
510
511
512
    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
513
      break;
514

515
516
517
518
    default:
      break;
  }
}
519
520
521
522
523
524

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
525
526
int task_lock(struct task *t) {

527
528
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
529
  struct cell *ci = t->ci, *cj = t->cj;
530
531
532
533
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif
534

535
  switch (type) {
536

537
538
539
    /* Communication task? */
    case task_type_recv:
    case task_type_send:
540
#ifdef WITH_MPI
541
542
543
544
545
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
546
547
548
549
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
550
551
      }
      return res;
552
#else
553
      error("SWIFT was not compiled with MPI support.");
554
#endif
555
      break;
556

557
558
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
559
    case task_type_logger:
560
    case task_type_timestep:
561
      if (ci->hydro.hold || ci->grav.phold) return 0;
562
563
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
Matthieu Schaller's avatar
Matthieu Schaller committed
564
565
        cell_unlocktree(ci);
        return 0;
566
567
568
      }
      break;

569
    case task_type_drift_part:
570
    case task_type_sort:
571
    case task_type_ghost:
572
    case task_type_end_hydro_force:
573
    case task_type_timestep_limiter:
574
      if (ci->hydro.hold) return 0;
575
576
      if (cell_locktree(ci) != 0) return 0;
      break;
577

Loic Hausammann's avatar
Loic Hausammann committed
578
579
580
581
582
    case task_type_stars_sort:
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

583
    case task_type_drift_gpart:
584
    case task_type_end_grav_force:
585
    case task_type_grav_mesh:
586
      if (ci->grav.phold) return 0;
587
588
589
      if (cell_glocktree(ci) != 0) return 0;
      break;

590
    case task_type_self:
591
    case task_type_sub_self:
592
      if (subtype == task_subtype_grav) {
593
        /* Lock the gparts and the m-pole */
594
        if (ci->grav.phold || ci->grav.mhold) return 0;
595
596
597
598
599
600
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
601
602
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
603
604
605
606
607
608
609
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
610
611
612
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
613
                 (subtype == task_subtype_do_gas_swallow)) {
614
615
616
617
618
619
620
        if (ci->black_holes.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
621
622
623
      } else if (subtype == task_subtype_do_bh_swallow) {
        if (ci->black_holes.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
Alexei Borissov's avatar
Alexei Borissov committed
624
      } else { /* subtype == hydro */
Loic Hausammann's avatar
Loic Hausammann committed
625
        if (ci->hydro.hold) return 0;
626
627
628
        if (cell_locktree(ci) != 0) return 0;
      }
      break;
629

630
    case task_type_pair:
631
    case task_type_sub_pair:
632
      if (subtype == task_subtype_grav) {
633
        /* Lock the gparts and the m-pole in both cells */
634
        if (ci->grav.phold || cj->grav.phold) return 0;
635
636
637
638
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
639
640
641
642
643
644
645
646
647
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
648
        }
649
650
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
670
671
672
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
673
                 (subtype == task_subtype_do_gas_swallow)) {
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
        /* Lock the BHs and the gas particles in both cells */
        if (ci->black_holes.hold || cj->black_holes.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_blocktree(cj) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_bunlocktree(ci);
          cell_bunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_bunlocktree(ci);
          cell_bunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
693
694
695
696
697
698
699
      } else if (subtype == task_subtype_do_bh_swallow) {
        if (ci->black_holes.hold || cj->black_holes.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_blocktree(cj) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
Alexei Borissov's avatar
Alexei Borissov committed
700
      } else { /* subtype == hydro */
701
        /* Lock the parts in both cells */
702
        if (ci->hydro.hold || cj->hydro.hold) return 0;
703
704
705
706
707
708
709
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;
710

711
712
    case task_type_grav_down:
      /* Lock the gparts and the m-poles */
713
      if (ci->grav.phold || ci->grav.mhold) return 0;
714
715
716
717
718
719
720
721
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
      break;

722
    case task_type_grav_long_range:
723
      /* Lock the m-poles */
724
      if (ci->grav.mhold) return 0;
725
      if (cell_mlocktree(ci) != 0) return 0;
Matthieu Schaller's avatar
Matthieu Schaller committed
726
727
      break;

728
729
    case task_type_grav_mm:
      /* Lock both m-poles */
730
      if (ci->grav.mhold || cj->grav.mhold) return 0;
731
732
733
734
735
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
      break;

    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }
751

752
753
    default:
      break;
754
755
756
757
758
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}
759

760
761
762
763
764
765
766
767
768
769
770
/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}
771

772
773
774
775
776
777
/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
778
 * @param type The #task type.
779
 * @param subtype The #task subtype.
780
 * @param cluster (return) The group name (should be allocated)
781
 */
782
void task_get_group_name(int type, int subtype, char *cluster) {
783

784
785
  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {
786
787
788
789
790

    strcpy(cluster, "Gravity");
    return;
  }

791
  switch (subtype) {
792
793
794
795
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
796
      if (type == task_type_send || type == task_type_recv) {
797
798
799
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Gradient");
800
      }
801
802
803
804
805
806
807
      break;
    case task_subtype_force:
      strcpy(cluster, "Force");
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
808
809
810
    case task_subtype_limiter:
      strcpy(cluster, "Timestep_limiter");
      break;
811
    case task_subtype_stars_density:
812
813
814
815
      strcpy(cluster, "StarsDensity");
      break;
    case task_subtype_stars_feedback:
      strcpy(cluster, "StarsFeedback");
816
      break;
817
818
819
    case task_subtype_bh_density:
      strcpy(cluster, "BHDensity");
      break;
820
821
822
    case task_subtype_bh_swallow:
      strcpy(cluster, "BHSwallow");
      break;
823
824
825
826
827
    case task_subtype_do_gas_swallow:
      strcpy(cluster, "DoGasSwallow");
      break;
    case task_subtype_do_bh_swallow:
      strcpy(cluster, "DoBHSwallow");
828
      break;
829
830
831
    case task_subtype_bh_feedback:
      strcpy(cluster, "BHFeedback");
      break;
832
833
834
835
836
837
838
839
840
841
842
843
844
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
845
void task_get_full_name(int type, int subtype, char *name) {
846
847
848

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
849
  if (type >= task_type_count) error("Unknown task type %i", type);
850

851
  if (subtype >= task_subtype_count)
852
853
854
855
856
857
858
859
860
861
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

862
863
864
865
866
867
868
869
870
871
#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
#endif
872
873

/**
874
875
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
 * under MPI. Note if running under MPIU all the ranks are dumped into this
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

  /* Need this to convert ticks to seconds. */
  unsigned long long cpufreq = clocks_get_cpufreq();

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
Josh Borrow's avatar
Josh Borrow committed
918
919
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
920
921
922
923
924
925
926
927
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
928
929
              (long long int)e->sched.tasks[l].tic,
              (long long int)e->sched.tasks[l].toc,
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
959
960
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
961
962
963
964
965
966
967
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
968
969
          (unsigned long long)e->sched.tasks[l].tic,
          (unsigned long long)e->sched.tasks[l].toc,
970
971
972
973
974
975
976
977
978
979
980
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
981
982
983
984
985
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
986
987
988
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
989
 *
990
991
992
993
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
 * and total time, in millisec and then the fixed costs value.
994
 *
995
996
997
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
998
 *
999
 * @param dumpfile name of the file for the output.
1000
 * @param e the #engine
1001
1002
1003
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
1004
 */
Peter W. Draper's avatar
Peter W. Draper committed
1005
1006
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045

  /* Need arrays for sum, min and max across all types and subtypes. */
  double sum[task_type_count][task_subtype_count];
  double min[task_type_count][task_subtype_count];
  double max[task_type_count][task_subtype_count];
  int count[task_type_count][task_subtype_count];

  for (int j = 0; j < task_type_count; j++) {
    for (int k = 0; k < task_subtype_count; k++) {
      sum[j][k] = 0.0;
      count[j][k] = 0;
      min[j][k] = DBL_MAX;
      max[j][k] = 0.0;
    }
  }

  double total[1] = {0.0};
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    int type = e->sched.tasks[l].type;

    /* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
     * are not interesting (or meaningfully measured). */
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
        type != task_type_send && type != task_type_recv) {
      int subtype = e->sched.tasks[l].subtype;

      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
      sum[type][subtype] += dt;
      count[type][subtype] += 1;
      if (dt < min[type][subtype]) {
        min[type][subtype] = dt;
      }
      if (dt > max[type][subtype]) {
        max[type][subtype] = dt;
      }
      total[0] += dt;
    }
  }

1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
#ifdef WITH_MPI
  if (allranks || header) {
    /* Get these from all ranks for output from rank 0. Could wrap these into a
     * single operation. */
    size_t size = task_type_count * task_subtype_count;
    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
                     MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
  }
1071

1072
  if (!allranks || (engine_rank == 0 && (allranks || header))) {
1073
1074
1075
#endif

    FILE *dfile = fopen(dumpfile, "w");
1076
1077
1078
1079
1080
1081
    if (header) {
      fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
      fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
    } else {
      fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
    }
1082
1083
1084
1085
1086
1087
1088

    for (int j = 0; j < task_type_count; j++) {
      const char *taskID = taskID_names[j];
      for (int k = 0; k < task_subtype_count; k++) {
        if (sum[j][k] > 0.0) {
          double mean = sum[j][k] / (double)count[j][k];
          double perc = 100.0 * sum[j][k] / total[0];
1089
1090
1091
1092

          /* Fixed cost is in .1ns as we want to compare between runs in
           * some absolute units. */
          int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
1093
1094
1095
1096
1097
1098
1099
1100
          if (header) {
            fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
                    k, fixed_cost, taskID, subtaskID_names[k]);
          } else {
            fprintf(dfile,
                    "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
                    taskID, subtaskID_names[k], count[j][k],
                    clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
Peter W. Draper's avatar
Peter W. Draper committed
1101
1102
                    clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), perc,
                    fixed_cost);
1103
          }
1104
1105
1106
1107
1108
1109
1110
        }
      }
    }
    fclose(dfile);
#ifdef WITH_MPI
  }
#endif
1111
}