task.c 46.6 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
23
24
25
26
27
28
29
30
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
/* MPI headers. */
#ifdef WITH_MPI
37
#include <mpi.h>
38
39
#endif

40
41
42
/* This object's header. */
#include "task.h"

43
/* Local headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
44
#include "atomic.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "inline.h"
48
#include "lock.h"
Peter W. Draper's avatar
Peter W. Draper committed
49
#include "mpiuse.h"
50
51

/* Task type names. */
52
53
54
55
56
57
58
59
60
61
62
63
64
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
65
                                             "drift_spart",
66
                                             "drift_bpart",
67
68
                                             "drift_gpart",
                                             "drift_gpart_out",
69
                                             "end_hydro_force",
70
71
72
                                             "kick1",
                                             "kick2",
                                             "timestep",
73
                                             "timestep_limiter",
74
                                             "timestep_sync",
75
76
77
78
79
80
81
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
82
                                             "grav_end_force",
83
84
                                             "cooling",
                                             "star_formation",
85
86
                                             "star_formation_in",
                                             "star_formation_out",
87
                                             "logger",
88
89
                                             "stars_in",
                                             "stars_out",
90
91
                                             "stars_ghost_in",
                                             "stars_ghost",
James Willis's avatar
James Willis committed
92
                                             "stars_ghost_out",
93
                                             "stars_sort",
94
                                             "stars_resort",
95
96
                                             "bh_in",
                                             "bh_out",
97
98
99
                                             "bh_density_ghost",
                                             "bh_swallow_ghost1",
                                             "bh_swallow_ghost2",
100
                                             "bh_swallow_ghost3",
101
                                             "fof_self",
James Willis's avatar
James Willis committed
102
                                             "fof_pair"};
103

104
/* Sub-task type names. */
105
const char *subtaskID_names[task_subtype_count] = {
106
107
108
109
110
111
112
113
    "none",       "density",      "gradient",       "force",
    "limiter",    "grav",         "external_grav",  "tend_part",
    "tend_gpart", "tend_spart",   "tend_bpart",     "xv",
    "rho",        "part_swallow", "bpart_merger",   "gpart",
    "multipole",  "spart",        "stars_density",  "stars_feedback",
    "sf_count",   "bpart_rho",    "bpart_swallow",  "bpart_feedback",
    "bh_density", "bh_swallow",   "do_gas_swallow", "do_bh_swallow",
    "bh_feedback"};
114

Matthieu Schaller's avatar
Matthieu Schaller committed
115
116
117
118
119
const char *task_category_names[task_category_count] = {
    "drift",       "sort",    "hydro",          "gravity", "feedback",
    "black holes", "cooling", "star formation", "limiter", "time integration",
    "mpi",         "fof",     "others"};

120
121
122
123
124
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

125
126
/**
 * @brief Computes the overlap between the parts array of two given cells.
127
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
128
129
130
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
131
 */
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }
149

150
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
151
152
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
153
TASK_CELL_OVERLAP(bpart, black_holes.parts, black_holes.count);
Loic Hausammann's avatar
Loic Hausammann committed
154

155
156
157
158
159
/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
160
161
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {
162
163
164
165
166
167
168

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;

169
    case task_type_drift_part:
170
171
    case task_type_sort:
    case task_type_ghost:
172
    case task_type_extra_ghost:
Stefan Arridge's avatar
Stefan Arridge committed
173
    case task_type_cooling:
174
    case task_type_end_hydro_force:
175
176
177
      return task_action_part;
      break;

178
179
180
    case task_type_star_formation:
      return task_action_all;

181
    case task_type_drift_spart:
182
    case task_type_stars_ghost:
Loic Hausammann's avatar
Loic Hausammann committed
183
    case task_type_stars_sort:
184
    case task_type_stars_resort:
Loic Hausammann's avatar
Loic Hausammann committed
185
186
187
      return task_action_spart;
      break;

188
    case task_type_drift_bpart:
189
    case task_type_bh_density_ghost:
190
    case task_type_bh_swallow_ghost3:
191
192
193
      return task_action_bpart;
      break;

194
195
196
197
198
199
200
    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
201
        case task_subtype_gradient:
202
        case task_subtype_force:
203
        case task_subtype_limiter:
204
205
206
          return task_action_part;
          break;

207
        case task_subtype_stars_density:
Alexei Borissov's avatar
Alexei Borissov committed
208
        case task_subtype_stars_feedback:
209
210
          return task_action_all;
          break;
211

212
213
        case task_subtype_bh_density:
        case task_subtype_bh_feedback:
214
        case task_subtype_bh_swallow:
215
        case task_subtype_do_gas_swallow:
216
217
218
          return task_action_all;
          break;

219
220
221
222
        case task_subtype_do_bh_swallow:
          return task_action_bpart;
          break;

223
        case task_subtype_grav:
224
        case task_subtype_external_grav:
225
226
227
228
          return task_action_gpart;
          break;

        default:
229
230
231
232
#ifdef SWIFT_DEBUG_CHECKS
          error("Unknown task_action for task %s/%s", taskID_names[t->type],
                subtaskID_names[t->subtype]);
#endif
233
234
235
236
237
          return task_action_none;
          break;
      }
      break;

238
239
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
240
    case task_type_logger:
James Willis's avatar
James Willis committed
241
242
    case task_type_fof_self:
    case task_type_fof_pair:
243
    case task_type_timestep:
Matthieu Schaller's avatar
Matthieu Schaller committed
244
245
    case task_type_timestep_limiter:
    case task_type_timestep_sync:
246
247
    case task_type_send:
    case task_type_recv:
248
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
249
        return task_action_all;
250
      else if (t->ci->hydro.count > 0)
251
        return task_action_part;
252
      else if (t->ci->grav.count > 0)
253
        return task_action_gpart;
254
255
      else {
#ifdef SWIFT_DEBUG_CHECKS
256
        error("Task without particles");
257
258
#endif
      }
259
260
      break;

261
    case task_type_init_grav:
262
    case task_type_grav_mm:
263
    case task_type_grav_long_range:
264
265
266
      return task_action_multipole;
      break;

267
    case task_type_drift_gpart:
268
    case task_type_grav_down:
269
    case task_type_end_grav_force:
270
    case task_type_grav_mesh:
271
      return task_action_gpart;
272
      break;
273

274
    default:
275
276
277
278
#ifdef SWIFT_DEBUG_CHECKS
      error("Unknown task_action for task %s/%s", taskID_names[t->type],
            subtaskID_names[t->subtype]);
#endif
279
280
281
      return task_action_none;
      break;
  }
282

283
284
285
286
287
#ifdef SWIFT_DEBUG_CHECKS
  error("Unknown task_action for task %s/%s", taskID_names[t->type],
        subtaskID_names[t->subtype]);
#endif
  /* Silence compiler warnings. We should never get here. */
288
  return task_action_none;
289
290
}

291
292
293
294
295
296
297
/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
298
299
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {
300
301
302
303
304
305

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

306
307
  /* First check if any of the two tasks are of a type that don't
     use cells. */
308
309
310
311
312
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
313
314
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
315
316
  const int ta_bpart =
      (ta_act == task_action_bpart || ta_act == task_action_all);
317
318
319
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
320
321
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);
322
323
  const int tb_bpart =
      (tb_act == task_action_bpart || tb_act == task_action_all);
324
325
326
327
328
329

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
330
331
332
333
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;
334

335
    if (size_union == 0) return 0.f;
336

337
338
339
340
341
342
343
344
345
346
347
348
349
350
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
351
352
353
354
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;
355

356
357
    if (size_union == 0) return 0.f;

358
359
360
361
362
363
364
365
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }
366

Loic Hausammann's avatar
Loic Hausammann committed
367
368
369
370
371
  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
372
373
374
375
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;
Loic Hausammann's avatar
Loic Hausammann committed
376

377
    if (size_union == 0) return 0.f;
Loic Hausammann's avatar
Loic Hausammann committed
378

Loic Hausammann's avatar
Loic Hausammann committed
379
380
381
382
383
384
385
386
387
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  /* In the case where both tasks act on bparts */
  else if (ta_bpart && tb_bpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
    if (ta->ci != NULL) size_union += ta->ci->black_holes.count;
    if (ta->cj != NULL) size_union += ta->cj->black_holes.count;
    if (tb->ci != NULL) size_union += tb->ci->black_holes.count;
    if (tb->cj != NULL) size_union += tb->cj->black_holes.count;

    if (size_union == 0) return 0.f;

    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_bpart(ta->ci, tb->ci) +
                                  task_cell_overlap_bpart(ta->ci, tb->cj) +
                                  task_cell_overlap_bpart(ta->cj, tb->ci) +
                                  task_cell_overlap_bpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

409
410
  /* Else, no overlap */
  return 0.f;
411
}
412

413
414
/**
 * @brief Unlock the cell held by this task.
415
 *
416
417
 * @param t The #task.
 */
418
419
void task_unlock(struct task *t) {

420
421
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
422
423
  struct cell *ci = t->ci, *cj = t->cj;

424
  /* Act based on task type. */
425
426
  switch (type) {

427
428
    case task_type_kick1:
    case task_type_kick2:
429
    case task_type_logger:
430
    case task_type_timestep:
431
432
433
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;
Matthieu Schaller's avatar
Matthieu Schaller committed
434

435
    case task_type_drift_part:
436
    case task_type_sort:
437
    case task_type_ghost:
438
    case task_type_extra_ghost:
439
    case task_type_end_hydro_force:
440
    case task_type_timestep_limiter:
441
    case task_type_timestep_sync:
442
443
444
      cell_unlocktree(ci);
      break;

445
    case task_type_drift_gpart:
446
    case task_type_end_grav_force:
447
448
449
      cell_gunlocktree(ci);
      break;

Loic Hausammann's avatar
Loic Hausammann committed
450
    case task_type_stars_sort:
451
    case task_type_stars_resort:
Loic Hausammann's avatar
Loic Hausammann committed
452
453
454
      cell_sunlocktree(ci);
      break;

455
    case task_type_self:
456
    case task_type_sub_self:
457
      if (subtype == task_subtype_grav) {
458
459
460
461
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        cell_gunlocktree(ci);
        cell_munlocktree(ci);
#endif
462
463
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
464
465
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
466
467
468
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
469
                 (subtype == task_subtype_do_gas_swallow)) {
470
471
        cell_bunlocktree(ci);
        cell_unlocktree(ci);
472
473
      } else if (subtype == task_subtype_do_bh_swallow) {
        cell_bunlocktree(ci);
474
      } else if (subtype == task_subtype_limiter) {
475
476
477
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        cell_unlocktree(ci);
#endif
478
      } else { /* hydro */
479
480
        cell_unlocktree(ci);
      }
481
      break;
482

483
    case task_type_pair:
484
    case task_type_sub_pair:
485
      if (subtype == task_subtype_grav) {
486
487
488
489
490
491
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
        cell_munlocktree(ci);
        cell_munlocktree(cj);
#endif
492
493
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
494
495
496
497
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
498
499
500
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
501
                 (subtype == task_subtype_do_gas_swallow)) {
502
503
504
505
        cell_bunlocktree(ci);
        cell_bunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
506
507
508
      } else if (subtype == task_subtype_do_bh_swallow) {
        cell_bunlocktree(ci);
        cell_bunlocktree(cj);
509
      } else if (subtype == task_subtype_limiter) {
510
511
512
513
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        cell_unlocktree(ci);
        cell_unlocktree(cj);
#endif
514
      } else { /* hydro */
515
516
517
518
519
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

520
    case task_type_grav_down:
521
522
523
524
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      cell_gunlocktree(ci);
      cell_munlocktree(ci);
#endif
525
526
      break;

527
    case task_type_grav_long_range:
528
529
530
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      cell_munlocktree(ci);
#endif
531
      break;
532

533
    case task_type_grav_mm:
534
535
536
537
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      cell_munlocktree(ci);
      cell_munlocktree(cj);
#endif
538
539
      break;

540
541
542
543
544
545
    case task_type_grav_mesh:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      cell_gunlocktree(ci);
#endif
      break;

546
547
548
549
    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
550
      break;
551

552
553
554
555
    default:
      break;
  }
}
556
557
558
559
560
561

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
562
563
int task_lock(struct task *t) {

564
565
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
566
  struct cell *ci = t->ci, *cj = t->cj;
567
568
569
570
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif
571

572
  switch (type) {
573

574
575
576
    /* Communication task? */
    case task_type_recv:
    case task_type_send:
577
#ifdef WITH_MPI
578
579
580
581
582
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
583
584
585
586
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
587
      }
Peter W. Draper's avatar
Peter W. Draper committed
588
589

      /* And log deactivation, if logging enabled. */
590
591
592
      if (res) {
        mpiuse_log_allocation(t->type, t->subtype, &t->req, 0, 0, 0, 0);
      }
Peter W. Draper's avatar
Peter W. Draper committed
593

594
      return res;
595
#else
596
      error("SWIFT was not compiled with MPI support.");
597
#endif
598
      break;
599

600
601
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
602
    case task_type_logger:
603
    case task_type_timestep:
604
      if (ci->hydro.hold || ci->grav.phold) return 0;
605
606
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
Matthieu Schaller's avatar
Matthieu Schaller committed
607
608
        cell_unlocktree(ci);
        return 0;
609
610
611
      }
      break;

612
    case task_type_drift_part:
613
    case task_type_sort:
614
    case task_type_ghost:
615
    case task_type_extra_ghost:
616
    case task_type_end_hydro_force:
617
    case task_type_timestep_limiter:
618
    case task_type_timestep_sync:
619
      if (ci->hydro.hold) return 0;
620
621
      if (cell_locktree(ci) != 0) return 0;
      break;
622

Loic Hausammann's avatar
Loic Hausammann committed
623
    case task_type_stars_sort:
624
    case task_type_stars_resort:
Loic Hausammann's avatar
Loic Hausammann committed
625
626
627
628
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

629
    case task_type_drift_gpart:
630
    case task_type_end_grav_force:
631
      if (ci->grav.phold) return 0;
632
633
634
      if (cell_glocktree(ci) != 0) return 0;
      break;

635
    case task_type_self:
636
    case task_type_sub_self:
637
      if (subtype == task_subtype_grav) {
638
639
640
641
642
643
644
645
646
647
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        /* Lock the gparts and the m-pole */
        if (ci->grav.phold || ci->grav.mhold) return 0;
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
#endif
648
649
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
650
651
652
653
654
655
656
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
657
658
659
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
660
                 (subtype == task_subtype_do_gas_swallow)) {
661
662
663
664
665
666
667
        if (ci->black_holes.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
668
669
670
      } else if (subtype == task_subtype_do_bh_swallow) {
        if (ci->black_holes.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
671
      } else if (subtype == task_subtype_limiter) {
672
673
674
675
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        if (ci->hydro.hold) return 0;
        if (cell_locktree(ci) != 0) return 0;
#endif
Alexei Borissov's avatar
Alexei Borissov committed
676
      } else { /* subtype == hydro */
Loic Hausammann's avatar
Loic Hausammann committed
677
        if (ci->hydro.hold) return 0;
678
679
680
        if (cell_locktree(ci) != 0) return 0;
      }
      break;
681

682
    case task_type_pair:
683
    case task_type_sub_pair:
684
      if (subtype == task_subtype_grav) {
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        /* Lock the gparts and the m-pole in both cells */
        if (ci->grav.phold || cj->grav.phold) return 0;
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
        }
#endif
703
704
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
724
725
726
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
727
                 (subtype == task_subtype_do_gas_swallow)) {
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
        /* Lock the BHs and the gas particles in both cells */
        if (ci->black_holes.hold || cj->black_holes.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_blocktree(cj) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_bunlocktree(ci);
          cell_bunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_bunlocktree(ci);
          cell_bunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
747
748
749
750
751
752
753
      } else if (subtype == task_subtype_do_bh_swallow) {
        if (ci->black_holes.hold || cj->black_holes.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_blocktree(cj) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
754
      } else if (subtype == task_subtype_limiter) {
755
756
757
758
759
760
761
762
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
#endif
Alexei Borissov's avatar
Alexei Borissov committed
763
      } else { /* subtype == hydro */
764
        /* Lock the parts in both cells */
765
        if (ci->hydro.hold || cj->hydro.hold) return 0;
766
767
768
769
770
771
772
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;
773

774
    case task_type_grav_down:
775
776
777
778
779
780
781
782
783
784
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      /* Lock the gparts and the m-poles */
      if (ci->grav.phold || ci->grav.mhold) return 0;
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
#endif
785
786
      break;

787
    case task_type_grav_long_range:
788
789
790
791
792
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      /* Lock the m-poles */
      if (ci->grav.mhold) return 0;
      if (cell_mlocktree(ci) != 0) return 0;
#endif
Matthieu Schaller's avatar
Matthieu Schaller committed
793
794
      break;

795
    case task_type_grav_mm:
796
797
798
799
800
801
802
803
804
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      /* Lock both m-poles */
      if (ci->grav.mhold || cj->grav.mhold) return 0;
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
#endif
805
806
      break;

807
808
809
810
811
812
813
814
    case task_type_grav_mesh:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
      /* Lock the gparts */
      if (ci->grav.phold) return 0;
      if (cell_glocktree(ci) != 0) return 0;
#endif
      break;

815
816
817
818
819
820
821
822
823
824
825
826
827
    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }
828

829
830
    default:
      break;
831
832
833
834
835
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}
836

837
838
839
840
841
842
843
844
845
846
847
/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}
848

849
850
851
852
853
854
/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
855
 * @param type The #task type.
856
 * @param subtype The #task subtype.
857
 * @param cluster (return) The group name (should be allocated)
858
 */
859
void task_get_group_name(int type, int subtype, char *cluster) {
860

861
862
  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {
863
864
865
866
867

    strcpy(cluster, "Gravity");
    return;
  }

868
  switch (subtype) {
869
870
871
872
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
873
      if (type == task_type_send || type == task_type_recv) {
874
875
876
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Gradient");
877
      }
878
879
880
881
882
883
884
      break;
    case task_subtype_force:
      strcpy(cluster, "Force");
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
885
    case task_subtype_limiter:
886
887
888
889
890
      if (type == task_type_send || type == task_type_recv) {
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Timestep_limiter");
      }
891
      break;
892
    case task_subtype_stars_density:
893
894
895
896
      strcpy(cluster, "StarsDensity");
      break;
    case task_subtype_stars_feedback:
      strcpy(cluster, "StarsFeedback");
897
      break;
898
899
900
    case task_subtype_bh_density:
      strcpy(cluster, "BHDensity");
      break;
901
902
903
    case task_subtype_bh_swallow:
      strcpy(cluster, "BHSwallow");
      break;
904
905
906
907
908
    case task_subtype_do_gas_swallow:
      strcpy(cluster, "DoGasSwallow");
      break;
    case task_subtype_do_bh_swallow:
      strcpy(cluster, "DoBHSwallow");
909
      break;
910
911
912
    case task_subtype_bh_feedback:
      strcpy(cluster, "BHFeedback");
      break;
913
914
915
916
917
918
919
920
921
922
923
924
925
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
926
void task_get_full_name(int type, int subtype, char *name) {
927
928
929

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
930
  if (type >= task_type_count) error("Unknown task type %i", type);
931

932
  if (subtype >= task_subtype_count)
933
934
935
936
937
938
939
940
941
942
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

943
944
945
946
947
948
949
950
951
#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
952
953
954
955
956
957
958
959
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_free_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_free(&subtaskMPI_comms[i]);
  }
}
960
#endif
961
962

/**
963
964
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
965
966
967
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
968
 * under MPI. Note if running under MPI all the ranks are dumped into this
969
970
971
972
973
974
975
976
977
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

978
979
  const ticks tic = getticks();

980
  /* Need this to convert ticks to seconds. */
981
  const unsigned long long cpufreq = clocks_get_cpufreq();
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
Josh Borrow's avatar
Josh Borrow committed
1009
1010
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
1011
1012
1013
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
1014
1015
        if (!e->sched.tasks[l].implicit &&
            e->sched.tasks[l].tic > e->tic_step) {
1016
1017
1018
1019
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
1020
1021
              (long long int)e->sched.tasks[l].tic,
              (long long int)e->sched.tasks[l].toc,
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
1051
1052
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
1053
1054
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
1055
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
1056
1057
1058
1059
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
1060
1061
          (unsigned long long)e->sched.tasks[l].tic,
          (unsigned long long)e->sched.tasks[l].toc,
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
1073
1074
1075
1076

  if (e->verbose)
    message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
            clocks_getunit());
1077
1078
1079
1080
1081
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
1082
1083
1084
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
1085
 *
1086
1087
1088
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
1089
1090
 * and total time taken and the same numbers for the start of the task,
 * in millisec and then the fixed costs value.
1091
 *
1092
1093
1094
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
1095
 *
1096
 * @param dumpfile name of the file for the output.
1097
 * @param e the #engine
1098
1099
1100
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
1101
 */
Peter W. Draper's avatar
Peter W. Draper committed
1102
1103
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {
1104