task.c 32.8 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
23
24
25
26
27
28
29
30
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
/* MPI headers. */
#ifdef WITH_MPI
37
#include <mpi.h>
38
39
#endif

40
41
42
/* This object's header. */
#include "task.h"

43
/* Local headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
44
#include "atomic.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "inline.h"
48
#include "lock.h"
49
50

/* Task type names. */
51
52
53
54
55
56
57
58
59
60
61
62
63
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
64
                                             "drift_spart",
65
66
                                             "drift_gpart",
                                             "drift_gpart_out",
67
                                             "end_hydro_force",
68
69
70
                                             "kick1",
                                             "kick2",
                                             "timestep",
71
                                             "timestep_limiter",
72
73
74
75
76
77
78
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
79
                                             "grav_end_force",
80
81
                                             "cooling",
                                             "star_formation",
82
83
                                             "star_formation_in",
                                             "star_formation_out",
84
                                             "logger",
85
86
                                             "stars_in",
                                             "stars_out",
87
88
                                             "stars_ghost_in",
                                             "stars_ghost",
Loic Hausammann's avatar
Loic Hausammann committed
89
                                             "stars_ghost_out",
90
                                             "stars_sort"};
91

92
/* Sub-task type names. */
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
const char *subtaskID_names[task_subtype_count] = {"none",
                                                   "density",
                                                   "gradient",
                                                   "force",
                                                   "limiter",
                                                   "grav",
                                                   "external_grav",
                                                   "tend_part",
                                                   "tend_gpart",
                                                   "tend_spart",
                                                   "xv",
                                                   "rho",
                                                   "gpart",
                                                   "multipole",
                                                   "spart",
                                                   "stars_density",
                                                   "stars_feedback"};
110

111
112
113
114
115
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

116
117
/**
 * @brief Computes the overlap between the parts array of two given cells.
118
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
119
120
121
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
122
 */
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }
140

141
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
142
143
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
Loic Hausammann's avatar
Loic Hausammann committed
144

145
146
147
148
149
/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
150
151
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {
152
153
154
155
156
157
158

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;

159
    case task_type_drift_part:
160
161
    case task_type_sort:
    case task_type_ghost:
162
    case task_type_extra_ghost:
163
    case task_type_timestep_limiter:
Stefan Arridge's avatar
Stefan Arridge committed
164
    case task_type_cooling:
165
    case task_type_end_hydro_force:
166
167
168
      return task_action_part;
      break;

169
170
171
    case task_type_star_formation:
      return task_action_all;

172
    case task_type_drift_spart:
173
    case task_type_stars_ghost:
174
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
175
176
177
      return task_action_spart;
      break;

178
179
180
181
182
183
184
    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
185
        case task_subtype_gradient:
186
        case task_subtype_force:
187
        case task_subtype_limiter:
188
189
190
          return task_action_part;
          break;

191
        case task_subtype_stars_density:
Alexei Borissov's avatar
Alexei Borissov committed
192
        case task_subtype_stars_feedback:
193
194
          return task_action_all;
          break;
195

196
        case task_subtype_grav:
197
        case task_subtype_external_grav:
198
199
200
201
          return task_action_gpart;
          break;

        default:
202
203
204
205
#ifdef SWIFT_DEBUG_CHECKS
          error("Unknown task_action for task %s/%s", taskID_names[t->type],
                subtaskID_names[t->subtype]);
#endif
206
207
208
209
210
          return task_action_none;
          break;
      }
      break;

211
212
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
213
    case task_type_logger:
214
    case task_type_timestep:
215
216
    case task_type_send:
    case task_type_recv:
217
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
218
        return task_action_all;
219
      else if (t->ci->hydro.count > 0)
220
        return task_action_part;
221
      else if (t->ci->grav.count > 0)
222
        return task_action_gpart;
223
224
      else {
#ifdef SWIFT_DEBUG_CHECKS
225
        error("Task without particles");
226
227
#endif
      }
228
229
      break;

230
    case task_type_init_grav:
231
    case task_type_grav_mm:
232
    case task_type_grav_long_range:
233
234
235
      return task_action_multipole;
      break;

236
    case task_type_drift_gpart:
237
    case task_type_grav_down:
238
    case task_type_end_grav_force:
239
    case task_type_grav_mesh:
240
      return task_action_gpart;
241
      break;
242

243
    default:
244
245
246
247
#ifdef SWIFT_DEBUG_CHECKS
      error("Unknown task_action for task %s/%s", taskID_names[t->type],
            subtaskID_names[t->subtype]);
#endif
248
249
250
      return task_action_none;
      break;
  }
251

252
253
254
255
256
#ifdef SWIFT_DEBUG_CHECKS
  error("Unknown task_action for task %s/%s", taskID_names[t->type],
        subtaskID_names[t->subtype]);
#endif
  /* Silence compiler warnings. We should never get here. */
257
  return task_action_none;
258
259
}

260
261
262
263
264
265
266
/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
267
268
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {
269
270
271
272
273
274

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

275
276
  /* First check if any of the two tasks are of a type that don't
     use cells. */
277
278
279
280
281
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
282
283
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
284
285
286
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
287
288
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);
289
290
291
292
293
294

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
295
296
297
298
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;
299

300
    if (size_union == 0) return 0.f;
301

302
303
304
305
306
307
308
309
310
311
312
313
314
315
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
316
317
318
319
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;
320

321
322
    if (size_union == 0) return 0.f;

323
324
325
326
327
328
329
330
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }
331

Loic Hausammann's avatar
Loic Hausammann committed
332
333
334
335
336
  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
337
338
339
340
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;
Loic Hausammann's avatar
Loic Hausammann committed
341

342
    if (size_union == 0) return 0.f;
Loic Hausammann's avatar
Loic Hausammann committed
343

Loic Hausammann's avatar
Loic Hausammann committed
344
345
346
347
348
349
350
351
352
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

353
354
  /* Else, no overlap */
  return 0.f;
355
}
356

357
358
/**
 * @brief Unlock the cell held by this task.
359
 *
360
361
 * @param t The #task.
 */
362
363
void task_unlock(struct task *t) {

364
365
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
366
367
  struct cell *ci = t->ci, *cj = t->cj;

368
  /* Act based on task type. */
369
370
  switch (type) {

371
372
    case task_type_kick1:
    case task_type_kick2:
373
    case task_type_logger:
374
    case task_type_timestep:
375
376
377
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;
Matthieu Schaller's avatar
Matthieu Schaller committed
378

379
    case task_type_drift_part:
380
    case task_type_sort:
381
    case task_type_ghost:
382
    case task_type_end_hydro_force:
383
    case task_type_timestep_limiter:
384
385
386
      cell_unlocktree(ci);
      break;

387
    case task_type_drift_gpart:
388
    case task_type_grav_mesh:
389
    case task_type_end_grav_force:
390
391
392
      cell_gunlocktree(ci);
      break;

393
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
394
395
396
      cell_sunlocktree(ci);
      break;

397
    case task_type_self:
398
    case task_type_sub_self:
399
400
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
401
        cell_munlocktree(ci);
Loic Hausammann's avatar
Loic Hausammann committed
402
403
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
Alexei Borissov's avatar
Alexei Borissov committed
404
405
406
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
407
408
409
      } else {
        cell_unlocktree(ci);
      }
410
      break;
411

412
    case task_type_pair:
413
    case task_type_sub_pair:
414
415
416
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
417
418
        cell_munlocktree(ci);
        cell_munlocktree(cj);
Loic Hausammann's avatar
Loic Hausammann committed
419
420
421
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
Alexei Borissov's avatar
Alexei Borissov committed
422
423
424
425
426
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
427
428
429
430
431
432
      } else {
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

433
    case task_type_grav_down:
434
      cell_gunlocktree(ci);
435
436
437
      cell_munlocktree(ci);
      break;

438
    case task_type_grav_long_range:
439
      cell_munlocktree(ci);
440
      break;
441

442
443
444
445
446
    case task_type_grav_mm:
      cell_munlocktree(ci);
      cell_munlocktree(cj);
      break;

447
448
449
450
    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
451
      break;
452

453
454
455
456
    default:
      break;
  }
}
457
458
459
460
461
462

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
463
464
int task_lock(struct task *t) {

465
466
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
467
  struct cell *ci = t->ci, *cj = t->cj;
468
469
470
471
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif
472

473
  switch (type) {
474

475
476
477
    /* Communication task? */
    case task_type_recv:
    case task_type_send:
478
#ifdef WITH_MPI
479
480
481
482
483
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
484
485
486
487
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
488
489
      }
      return res;
490
#else
491
      error("SWIFT was not compiled with MPI support.");
492
#endif
493
      break;
494

495
496
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
497
    case task_type_logger:
498
    case task_type_timestep:
499
      if (ci->hydro.hold || ci->grav.phold) return 0;
500
501
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
Matthieu Schaller's avatar
Matthieu Schaller committed
502
503
        cell_unlocktree(ci);
        return 0;
504
505
506
      }
      break;

507
    case task_type_drift_part:
508
    case task_type_sort:
509
    case task_type_ghost:
510
    case task_type_end_hydro_force:
511
    case task_type_timestep_limiter:
512
      if (ci->hydro.hold) return 0;
513
514
      if (cell_locktree(ci) != 0) return 0;
      break;
515

516
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
517
518
519
520
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

521
    case task_type_drift_gpart:
522
    case task_type_end_grav_force:
523
    case task_type_grav_mesh:
524
      if (ci->grav.phold) return 0;
525
526
527
      if (cell_glocktree(ci) != 0) return 0;
      break;

528
    case task_type_self:
529
    case task_type_sub_self:
530
      if (subtype == task_subtype_grav) {
531
        /* Lock the gparts and the m-pole */
532
        if (ci->grav.phold || ci->grav.mhold) return 0;
533
534
535
536
537
538
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
Loic Hausammann's avatar
Loic Hausammann committed
539
540
541
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
Alexei Borissov's avatar
Alexei Borissov committed
542
543
544
545
546
547
548
549
550
      } else if (subtype == task_subtype_stars_feedback) {
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
Loic Hausammann's avatar
Loic Hausammann committed
551
        if (ci->hydro.hold) return 0;
552
553
554
        if (cell_locktree(ci) != 0) return 0;
      }
      break;
555

556
    case task_type_pair:
557
    case task_type_sub_pair:
558
      if (subtype == task_subtype_grav) {
559
        /* Lock the gparts and the m-pole in both cells */
560
        if (ci->grav.phold || cj->grav.phold) return 0;
561
562
563
564
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
565
566
567
568
569
570
571
572
573
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
574
        }
Loic Hausammann's avatar
Loic Hausammann committed
575
576
577
578
579
580
581
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
Alexei Borissov's avatar
Alexei Borissov committed
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
      } else if (subtype == task_subtype_stars_feedback) {
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
603
        /* Lock the parts in both cells */
604
        if (ci->hydro.hold || cj->hydro.hold) return 0;
605
606
607
608
609
610
611
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;
612

613
614
    case task_type_grav_down:
      /* Lock the gparts and the m-poles */
615
      if (ci->grav.phold || ci->grav.mhold) return 0;
616
617
618
619
620
621
622
623
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
      break;

624
    case task_type_grav_long_range:
625
      /* Lock the m-poles */
626
      if (ci->grav.mhold) return 0;
627
      if (cell_mlocktree(ci) != 0) return 0;
Matthieu Schaller's avatar
Matthieu Schaller committed
628
629
      break;

630
631
    case task_type_grav_mm:
      /* Lock both m-poles */
632
      if (ci->grav.mhold || cj->grav.mhold) return 0;
633
634
635
636
637
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
      break;

    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }
653

654
655
    default:
      break;
656
657
658
659
660
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}
661

662
663
664
665
666
667
668
669
670
671
672
/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}
673

674
675
676
677
678
679
/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
680
 * @param type The #task type.
681
 * @param subtype The #task subtype.
682
 * @param cluster (return) The group name (should be allocated)
683
 */
684
void task_get_group_name(int type, int subtype, char *cluster) {
685

686
687
  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {
688
689
690
691
692

    strcpy(cluster, "Gravity");
    return;
  }

693
  switch (subtype) {
694
695
696
697
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
698
      if (type == task_type_send || type == task_type_recv) {
699
700
701
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Gradient");
702
      }
703
704
      break;
    case task_subtype_force:
705
      strcpy(cluster, "Force");
706
707
708
709
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
710
711
712
    case task_subtype_limiter:
      strcpy(cluster, "Timestep_limiter");
      break;
713
    case task_subtype_stars_density:
714
715
716
717
      strcpy(cluster, "StarsDensity");
      break;
    case task_subtype_stars_feedback:
      strcpy(cluster, "StarsFeedback");
718
719
720
721
722
723
724
725
726
727
728
729
730
731
      break;
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
732
void task_get_full_name(int type, int subtype, char *name) {
733
734
735

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
736
  if (type >= task_type_count) error("Unknown task type %i", type);
737

738
  if (subtype >= task_subtype_count)
739
740
741
742
743
744
745
746
747
748
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

749
750
751
752
753
754
755
756
757
758
#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
#endif
759
760

/**
761
762
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
 * under MPI. Note if running under MPIU all the ranks are dumped into this
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

  /* Need this to convert ticks to seconds. */
  unsigned long long cpufreq = clocks_get_cpufreq();

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
Josh Borrow's avatar
Josh Borrow committed
805
806
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
807
808
809
810
811
812
813
814
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
815
816
              (long long int)e->sched.tasks[l].tic,
              (long long int)e->sched.tasks[l].toc,
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
846
847
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
848
849
850
851
852
853
854
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
855
856
          (unsigned long long)e->sched.tasks[l].tic,
          (unsigned long long)e->sched.tasks[l].toc,
857
858
859
860
861
862
863
864
865
866
867
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
868
869
870
871
872
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
873
874
875
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
876
 *
877
878
879
880
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
 * and total time, in millisec and then the fixed costs value.
881
 *
882
883
884
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
885
 *
886
 * @param dumpfile name of the file for the output.
887
 * @param e the #engine
888
889
890
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
891
 */
Peter W. Draper's avatar
Peter W. Draper committed
892
893
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932

  /* Need arrays for sum, min and max across all types and subtypes. */
  double sum[task_type_count][task_subtype_count];
  double min[task_type_count][task_subtype_count];
  double max[task_type_count][task_subtype_count];
  int count[task_type_count][task_subtype_count];

  for (int j = 0; j < task_type_count; j++) {
    for (int k = 0; k < task_subtype_count; k++) {
      sum[j][k] = 0.0;
      count[j][k] = 0;
      min[j][k] = DBL_MAX;
      max[j][k] = 0.0;
    }
  }

  double total[1] = {0.0};
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    int type = e->sched.tasks[l].type;

    /* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
     * are not interesting (or meaningfully measured). */
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
        type != task_type_send && type != task_type_recv) {
      int subtype = e->sched.tasks[l].subtype;

      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
      sum[type][subtype] += dt;
      count[type][subtype] += 1;
      if (dt < min[type][subtype]) {
        min[type][subtype] = dt;
      }
      if (dt > max[type][subtype]) {
        max[type][subtype] = dt;
      }
      total[0] += dt;
    }
  }

933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
#ifdef WITH_MPI
  if (allranks || header) {
    /* Get these from all ranks for output from rank 0. Could wrap these into a
     * single operation. */
    size_t size = task_type_count * task_subtype_count;
    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
                     MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
  }
958

959
  if (!allranks || (engine_rank == 0 && (allranks || header))) {
960
961
962
#endif

    FILE *dfile = fopen(dumpfile, "w");
963
964
965
966
967
968
    if (header) {
      fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
      fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
    } else {
      fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
    }
969
970
971
972
973
974
975

    for (int j = 0; j < task_type_count; j++) {
      const char *taskID = taskID_names[j];
      for (int k = 0; k < task_subtype_count; k++) {
        if (sum[j][k] > 0.0) {
          double mean = sum[j][k] / (double)count[j][k];
          double perc = 100.0 * sum[j][k] / total[0];
976
977
978
979

          /* Fixed cost is in .1ns as we want to compare between runs in
           * some absolute units. */
          int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
980
981
982
983
984
985
986
987
          if (header) {
            fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
                    k, fixed_cost, taskID, subtaskID_names[k]);
          } else {
            fprintf(dfile,
                    "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
                    taskID, subtaskID_names[k], count[j][k],
                    clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
Peter W. Draper's avatar
Peter W. Draper committed
988
989
                    clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), perc,
                    fixed_cost);
990
          }
991
992
993
994
995
996
997
        }
      }
    }
    fclose(dfile);
#ifdef WITH_MPI
  }
#endif
998
}