task.c 38.8 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
23
24
25
26
27
28
29
30
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
/* MPI headers. */
#ifdef WITH_MPI
37
#include <mpi.h>
38
39
#endif

40
41
42
/* This object's header. */
#include "task.h"

43
/* Local headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
44
#include "atomic.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "inline.h"
48
#include "lock.h"
Peter W. Draper's avatar
Peter W. Draper committed
49
#include "mpiuse.h"
50
51

/* Task type names. */
52
53
54
55
56
57
58
59
60
61
62
63
64
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
65
                                             "drift_spart",
66
                                             "drift_bpart",
67
68
                                             "drift_gpart",
                                             "drift_gpart_out",
69
                                             "end_hydro_force",
70
71
72
                                             "kick1",
                                             "kick2",
                                             "timestep",
73
                                             "timestep_limiter",
74
75
76
77
78
79
80
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
81
                                             "grav_end_force",
82
83
                                             "cooling",
                                             "star_formation",
84
85
                                             "star_formation_in",
                                             "star_formation_out",
86
                                             "logger",
87
88
                                             "stars_in",
                                             "stars_out",
89
90
                                             "stars_ghost_in",
                                             "stars_ghost",
James Willis's avatar
James Willis committed
91
                                             "stars_ghost_out",
92
                                             "stars_sort",
93
                                             "stars_resort",
94
95
                                             "bh_in",
                                             "bh_out",
96
97
98
                                             "bh_density_ghost",
                                             "bh_swallow_ghost1",
                                             "bh_swallow_ghost2",
99
                                             "bh_swallow_ghost3",
100
                                             "fof_self",
James Willis's avatar
James Willis committed
101
                                             "fof_pair"};
102

103
/* Sub-task type names. */
104
const char *subtaskID_names[task_subtype_count] = {
105
106
107
108
109
110
111
112
    "none",       "density",      "gradient",       "force",
    "limiter",    "grav",         "external_grav",  "tend_part",
    "tend_gpart", "tend_spart",   "tend_bpart",     "xv",
    "rho",        "part_swallow", "bpart_merger",   "gpart",
    "multipole",  "spart",        "stars_density",  "stars_feedback",
    "sf_count",   "bpart_rho",    "bpart_swallow",  "bpart_feedback",
    "bh_density", "bh_swallow",   "do_gas_swallow", "do_bh_swallow",
    "bh_feedback"};
113

114
115
116
117
118
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

119
120
/**
 * @brief Computes the overlap between the parts array of two given cells.
121
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
122
123
124
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
125
 */
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }
143

144
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
145
146
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
147
TASK_CELL_OVERLAP(bpart, black_holes.parts, black_holes.count);
Loic Hausammann's avatar
Loic Hausammann committed
148

149
150
151
152
153
/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
154
155
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {
156
157
158
159
160
161
162

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;

163
    case task_type_drift_part:
164
165
    case task_type_sort:
    case task_type_ghost:
166
    case task_type_extra_ghost:
167
    case task_type_timestep_limiter:
Stefan Arridge's avatar
Stefan Arridge committed
168
    case task_type_cooling:
169
    case task_type_end_hydro_force:
170
171
172
      return task_action_part;
      break;

173
174
175
    case task_type_star_formation:
      return task_action_all;

176
    case task_type_drift_spart:
177
    case task_type_stars_ghost:
Loic Hausammann's avatar
Loic Hausammann committed
178
    case task_type_stars_sort:
179
    case task_type_stars_resort:
Loic Hausammann's avatar
Loic Hausammann committed
180
181
182
      return task_action_spart;
      break;

183
    case task_type_drift_bpart:
184
    case task_type_bh_density_ghost:
185
    case task_type_bh_swallow_ghost3:
186
187
188
      return task_action_bpart;
      break;

189
190
191
192
193
194
195
    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
196
        case task_subtype_gradient:
197
        case task_subtype_force:
198
        case task_subtype_limiter:
199
200
201
          return task_action_part;
          break;

202
        case task_subtype_stars_density:
Alexei Borissov's avatar
Alexei Borissov committed
203
        case task_subtype_stars_feedback:
204
205
          return task_action_all;
          break;
206

207
208
        case task_subtype_bh_density:
        case task_subtype_bh_feedback:
209
        case task_subtype_bh_swallow:
210
        case task_subtype_do_gas_swallow:
211
212
213
          return task_action_all;
          break;

214
215
216
217
        case task_subtype_do_bh_swallow:
          return task_action_bpart;
          break;

218
        case task_subtype_grav:
219
        case task_subtype_external_grav:
220
221
222
223
          return task_action_gpart;
          break;

        default:
224
225
226
227
#ifdef SWIFT_DEBUG_CHECKS
          error("Unknown task_action for task %s/%s", taskID_names[t->type],
                subtaskID_names[t->subtype]);
#endif
228
229
230
231
232
          return task_action_none;
          break;
      }
      break;

233
234
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
235
    case task_type_logger:
James Willis's avatar
James Willis committed
236
237
    case task_type_fof_self:
    case task_type_fof_pair:
238
    case task_type_timestep:
239
240
    case task_type_send:
    case task_type_recv:
241
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
242
        return task_action_all;
243
      else if (t->ci->hydro.count > 0)
244
        return task_action_part;
245
      else if (t->ci->grav.count > 0)
246
        return task_action_gpart;
247
248
      else {
#ifdef SWIFT_DEBUG_CHECKS
249
        error("Task without particles");
250
251
#endif
      }
252
253
      break;

254
    case task_type_init_grav:
255
    case task_type_grav_mm:
256
    case task_type_grav_long_range:
257
258
259
      return task_action_multipole;
      break;

260
    case task_type_drift_gpart:
261
    case task_type_grav_down:
262
    case task_type_end_grav_force:
263
    case task_type_grav_mesh:
264
      return task_action_gpart;
265
      break;
266

267
    default:
268
269
270
271
#ifdef SWIFT_DEBUG_CHECKS
      error("Unknown task_action for task %s/%s", taskID_names[t->type],
            subtaskID_names[t->subtype]);
#endif
272
273
274
      return task_action_none;
      break;
  }
275

276
277
278
279
280
#ifdef SWIFT_DEBUG_CHECKS
  error("Unknown task_action for task %s/%s", taskID_names[t->type],
        subtaskID_names[t->subtype]);
#endif
  /* Silence compiler warnings. We should never get here. */
281
  return task_action_none;
282
283
}

284
285
286
287
288
289
290
/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
291
292
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {
293
294
295
296
297
298

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

299
300
  /* First check if any of the two tasks are of a type that don't
     use cells. */
301
302
303
304
305
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
306
307
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
308
309
  const int ta_bpart =
      (ta_act == task_action_bpart || ta_act == task_action_all);
310
311
312
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
313
314
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);
315
316
  const int tb_bpart =
      (tb_act == task_action_bpart || tb_act == task_action_all);
317
318
319
320
321
322

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
323
324
325
326
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;
327

328
    if (size_union == 0) return 0.f;
329

330
331
332
333
334
335
336
337
338
339
340
341
342
343
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
344
345
346
347
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;
348

349
350
    if (size_union == 0) return 0.f;

351
352
353
354
355
356
357
358
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }
359

Loic Hausammann's avatar
Loic Hausammann committed
360
361
362
363
364
  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
365
366
367
368
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;
Loic Hausammann's avatar
Loic Hausammann committed
369

370
    if (size_union == 0) return 0.f;
Loic Hausammann's avatar
Loic Hausammann committed
371

Loic Hausammann's avatar
Loic Hausammann committed
372
373
374
375
376
377
378
379
380
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
  /* In the case where both tasks act on bparts */
  else if (ta_bpart && tb_bpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
    if (ta->ci != NULL) size_union += ta->ci->black_holes.count;
    if (ta->cj != NULL) size_union += ta->cj->black_holes.count;
    if (tb->ci != NULL) size_union += tb->ci->black_holes.count;
    if (tb->cj != NULL) size_union += tb->cj->black_holes.count;

    if (size_union == 0) return 0.f;

    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_bpart(ta->ci, tb->ci) +
                                  task_cell_overlap_bpart(ta->ci, tb->cj) +
                                  task_cell_overlap_bpart(ta->cj, tb->ci) +
                                  task_cell_overlap_bpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

402
403
  /* Else, no overlap */
  return 0.f;
404
}
405

406
407
/**
 * @brief Unlock the cell held by this task.
408
 *
409
410
 * @param t The #task.
 */
411
412
void task_unlock(struct task *t) {

413
414
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
415
416
  struct cell *ci = t->ci, *cj = t->cj;

417
  /* Act based on task type. */
418
419
  switch (type) {

420
421
    case task_type_kick1:
    case task_type_kick2:
422
    case task_type_logger:
423
    case task_type_timestep:
424
425
426
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;
Matthieu Schaller's avatar
Matthieu Schaller committed
427

428
    case task_type_drift_part:
429
    case task_type_sort:
430
    case task_type_ghost:
431
    case task_type_extra_ghost:
432
    case task_type_end_hydro_force:
433
    case task_type_timestep_limiter:
434
435
436
      cell_unlocktree(ci);
      break;

437
    case task_type_drift_gpart:
438
    case task_type_grav_mesh:
439
    case task_type_end_grav_force:
440
441
442
      cell_gunlocktree(ci);
      break;

Loic Hausammann's avatar
Loic Hausammann committed
443
    case task_type_stars_sort:
444
    case task_type_stars_resort:
Loic Hausammann's avatar
Loic Hausammann committed
445
446
447
      cell_sunlocktree(ci);
      break;

448
    case task_type_self:
449
    case task_type_sub_self:
450
451
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
452
        cell_munlocktree(ci);
453
454
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
455
456
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
457
458
459
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
460
                 (subtype == task_subtype_do_gas_swallow)) {
461
462
        cell_bunlocktree(ci);
        cell_unlocktree(ci);
463
464
      } else if (subtype == task_subtype_do_bh_swallow) {
        cell_bunlocktree(ci);
465
466
467
      } else {
        cell_unlocktree(ci);
      }
468
      break;
469

470
    case task_type_pair:
471
    case task_type_sub_pair:
472
473
474
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
475
476
        cell_munlocktree(ci);
        cell_munlocktree(cj);
477
478
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
479
480
481
482
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
483
484
485
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
486
                 (subtype == task_subtype_do_gas_swallow)) {
487
488
489
490
        cell_bunlocktree(ci);
        cell_bunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
491
492
493
      } else if (subtype == task_subtype_do_bh_swallow) {
        cell_bunlocktree(ci);
        cell_bunlocktree(cj);
494
495
496
497
498
499
      } else {
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

500
    case task_type_grav_down:
501
      cell_gunlocktree(ci);
502
503
504
      cell_munlocktree(ci);
      break;

505
    case task_type_grav_long_range:
506
      cell_munlocktree(ci);
507
      break;
508

509
510
511
512
513
    case task_type_grav_mm:
      cell_munlocktree(ci);
      cell_munlocktree(cj);
      break;

514
515
516
517
    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
518
      break;
519

520
521
522
523
    default:
      break;
  }
}
524
525
526
527
528
529

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
530
531
int task_lock(struct task *t) {

532
533
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
534
  struct cell *ci = t->ci, *cj = t->cj;
535
536
537
538
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif
539

540
  switch (type) {
541

542
543
544
    /* Communication task? */
    case task_type_recv:
    case task_type_send:
545
#ifdef WITH_MPI
546
547
548
549
550
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
551
552
553
554
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
555
      }
Peter W. Draper's avatar
Peter W. Draper committed
556
557
558
559

      /* And log deactivation, if logging enabled. */
      if (res) mpiuse_log_allocation(t->type, t->subtype, &t->req, 0, 0, 0, 0);

560
      return res;
561
#else
562
      error("SWIFT was not compiled with MPI support.");
563
#endif
564
      break;
565

566
567
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
568
    case task_type_logger:
569
    case task_type_timestep:
570
      if (ci->hydro.hold || ci->grav.phold) return 0;
571
572
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
Matthieu Schaller's avatar
Matthieu Schaller committed
573
574
        cell_unlocktree(ci);
        return 0;
575
576
577
      }
      break;

578
    case task_type_drift_part:
579
    case task_type_sort:
580
    case task_type_ghost:
581
    case task_type_extra_ghost:
582
    case task_type_end_hydro_force:
583
    case task_type_timestep_limiter:
584
      if (ci->hydro.hold) return 0;
585
586
      if (cell_locktree(ci) != 0) return 0;
      break;
587

Loic Hausammann's avatar
Loic Hausammann committed
588
    case task_type_stars_sort:
589
    case task_type_stars_resort:
Loic Hausammann's avatar
Loic Hausammann committed
590
591
592
593
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

594
    case task_type_drift_gpart:
595
    case task_type_end_grav_force:
596
    case task_type_grav_mesh:
597
      if (ci->grav.phold) return 0;
598
599
600
      if (cell_glocktree(ci) != 0) return 0;
      break;

601
    case task_type_self:
602
    case task_type_sub_self:
603
      if (subtype == task_subtype_grav) {
604
        /* Lock the gparts and the m-pole */
605
        if (ci->grav.phold || ci->grav.mhold) return 0;
606
607
608
609
610
611
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
612
613
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
614
615
616
617
618
619
620
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
621
622
623
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
624
                 (subtype == task_subtype_do_gas_swallow)) {
625
626
627
628
629
630
631
        if (ci->black_holes.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
632
633
634
      } else if (subtype == task_subtype_do_bh_swallow) {
        if (ci->black_holes.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
Alexei Borissov's avatar
Alexei Borissov committed
635
      } else { /* subtype == hydro */
Loic Hausammann's avatar
Loic Hausammann committed
636
        if (ci->hydro.hold) return 0;
637
638
639
        if (cell_locktree(ci) != 0) return 0;
      }
      break;
640

641
    case task_type_pair:
642
    case task_type_sub_pair:
643
      if (subtype == task_subtype_grav) {
644
        /* Lock the gparts and the m-pole in both cells */
645
        if (ci->grav.phold || cj->grav.phold) return 0;
646
647
648
649
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
650
651
652
653
654
655
656
657
658
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
659
        }
660
661
      } else if ((subtype == task_subtype_stars_density) ||
                 (subtype == task_subtype_stars_feedback)) {
Alexei Borissov's avatar
Alexei Borissov committed
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
681
682
683
      } else if ((subtype == task_subtype_bh_density) ||
                 (subtype == task_subtype_bh_feedback) ||
                 (subtype == task_subtype_bh_swallow) ||
684
                 (subtype == task_subtype_do_gas_swallow)) {
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
        /* Lock the BHs and the gas particles in both cells */
        if (ci->black_holes.hold || cj->black_holes.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_blocktree(cj) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_bunlocktree(ci);
          cell_bunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_bunlocktree(ci);
          cell_bunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
704
705
706
707
708
709
710
      } else if (subtype == task_subtype_do_bh_swallow) {
        if (ci->black_holes.hold || cj->black_holes.hold) return 0;
        if (cell_blocktree(ci) != 0) return 0;
        if (cell_blocktree(cj) != 0) {
          cell_bunlocktree(ci);
          return 0;
        }
Alexei Borissov's avatar
Alexei Borissov committed
711
      } else { /* subtype == hydro */
712
        /* Lock the parts in both cells */
713
        if (ci->hydro.hold || cj->hydro.hold) return 0;
714
715
716
717
718
719
720
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;
721

722
723
    case task_type_grav_down:
      /* Lock the gparts and the m-poles */
724
      if (ci->grav.phold || ci->grav.mhold) return 0;
725
726
727
728
729
730
731
732
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
      break;

733
    case task_type_grav_long_range:
734
      /* Lock the m-poles */
735
      if (ci->grav.mhold) return 0;
736
      if (cell_mlocktree(ci) != 0) return 0;
Matthieu Schaller's avatar
Matthieu Schaller committed
737
738
      break;

739
740
    case task_type_grav_mm:
      /* Lock both m-poles */
741
      if (ci->grav.mhold || cj->grav.mhold) return 0;
742
743
744
745
746
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
      break;

    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }
762

763
764
    default:
      break;
765
766
767
768
769
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}
770

771
772
773
774
775
776
777
778
779
780
781
/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}
782

783
784
785
786
787
788
/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
789
 * @param type The #task type.
790
 * @param subtype The #task subtype.
791
 * @param cluster (return) The group name (should be allocated)
792
 */
793
void task_get_group_name(int type, int subtype, char *cluster) {
794

795
796
  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {
797
798
799
800
801

    strcpy(cluster, "Gravity");
    return;
  }

802
  switch (subtype) {
803
804
805
806
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
807
      if (type == task_type_send || type == task_type_recv) {
808
809
810
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Gradient");
811
      }
812
813
814
815
816
817
818
      break;
    case task_subtype_force:
      strcpy(cluster, "Force");
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
819
820
821
    case task_subtype_limiter:
      strcpy(cluster, "Timestep_limiter");
      break;
822
    case task_subtype_stars_density:
823
824
825
826
      strcpy(cluster, "StarsDensity");
      break;
    case task_subtype_stars_feedback:
      strcpy(cluster, "StarsFeedback");
827
      break;
828
829
830
    case task_subtype_bh_density:
      strcpy(cluster, "BHDensity");
      break;
831
832
833
    case task_subtype_bh_swallow:
      strcpy(cluster, "BHSwallow");
      break;
834
835
836
837
838
    case task_subtype_do_gas_swallow:
      strcpy(cluster, "DoGasSwallow");
      break;
    case task_subtype_do_bh_swallow:
      strcpy(cluster, "DoBHSwallow");
839
      break;
840
841
842
    case task_subtype_bh_feedback:
      strcpy(cluster, "BHFeedback");
      break;
843
844
845
846
847
848
849
850
851
852
853
854
855
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
856
void task_get_full_name(int type, int subtype, char *name) {
857
858
859

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
860
  if (type >= task_type_count) error("Unknown task type %i", type);
861

862
  if (subtype >= task_subtype_count)
863
864
865
866
867
868
869
870
871
872
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

873
874
875
876
877
878
879
880
881
882
#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
#endif
883
884

/**
885
886
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
 * under MPI. Note if running under MPIU all the ranks are dumped into this
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

  /* Need this to convert ticks to seconds. */
  unsigned long long cpufreq = clocks_get_cpufreq();

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
Josh Borrow's avatar
Josh Borrow committed
929
930
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
931
932
933
934
935
936
937
938
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
939
940
              (long long int)e->sched.tasks[l].tic,
              (long long int)e->sched.tasks[l].toc,
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
970
971
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
972
973
974
975
976
977
978
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
979
980
          (unsigned long long)e->sched.tasks[l].tic,
          (unsigned long long)e->sched.tasks[l].toc,
981
982
983
984
985
986
987
988
989
990
991
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
992
993
994
995
996
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
997
998
999
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
1000
 *
1001
1002
1003
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
1004
1005
 * and total time taken and the same numbers for the start of the task,
 * in millisec and then the fixed costs value.
1006
 *
1007
1008
1009
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
1010
 *
1011
 * @param dumpfile name of the file for the output.
1012
 * @param e the #engine
1013
1014
1015
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
1016
 */
Peter W. Draper's avatar
Peter W. Draper committed
1017
1018
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {
1019
1020
1021

  /* Need arrays for sum, min and max across all types and subtypes. */
  double sum[task_type_count][task_subtype_count];
1022
  double tsum[task_type_count][task_subtype_count];
1023
  double min[task_type_count][task_subtype_count];
1024
  double tmin[task_type_count][task_subtype_count];
1025
  double max[task_type_count][task_subtype_count];
1026
  double tmax[task_type_count][task_subtype_count];
1027
1028
1029
1030
1031
  int count[task_type_count][task_subtype_count];

  for (int j = 0; j < task_type_count; j++) {
    for (int k = 0; k < task_subtype_count; k++) {
      sum[j][k] = 0.0;
1032
      tsum[j][k] = 0.0;
1033
1034
      count[j][k] = 0;
      min[j][k] = DBL_MAX;
1035
      tmin[j][k] = DBL_MAX;
1036
      max[j][k] = 0.0;
1037
      tmax[j][k] = 0.0;
1038
1039
1040
1041
1042
1043
1044
    }
  }

  double total[1] = {0.0};
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    int type = e->sched.tasks[l].type;

1045
1046
    /* Skip implicit tasks, tasks that didn't run. */
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
1047
1048
1049
1050
      int subtype = e->sched.tasks[l].subtype;

      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
      sum[type][subtype] += dt;
1051
1052
1053

      double tic = (double)e->sched.tasks[l].tic;
      tsum[type][subtype] += tic;
1054
1055
1056
1057
      count[type][subtype] += 1;
      if (dt < min[type][subtype]) {
        min[type][subtype] = dt;
      }
1058
1059
1060
      if (tic < tmin[type][subtype]) {
        tmin[type][subtype] = tic;
      }
1061
1062
1063
      if (dt > max[type][subtype]) {
        max[type][subtype] = dt;
      }
1064
1065
      if (tic > tmax[type][subtype]) {
        tmax[type][subtype] = tic;
1066
1067
1068
1069
1070
      }
      total[0] += dt;
    }
  }

1071
1072
1073
1074
1075
1076
1077
1078
1079
#ifdef WITH_MPI
  if (allranks || header) {
    /* Get these from all ranks for output from rank 0. Could wrap these into a
     * single operation. */
    size_t size = task_type_count * task_subtype_count;
    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");

1080
1081
1082
1083
    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : tsum), tsum, size,
                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task tsums");

1084
1085
1086
1087
1088
1089
1090
1091
    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");