task.c 32 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
23
24
25
26
27
28
29
30
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
/* MPI headers. */
#ifdef WITH_MPI
37
#include <mpi.h>
38
39
#endif

40
41
42
/* This object's header. */
#include "task.h"

43
/* Local headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
44
#include "atomic.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "inline.h"
48
#include "lock.h"
49
50

/* Task type names. */
51
52
53
54
55
56
57
58
59
60
61
62
63
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
64
                                             "drift_spart",
65
66
                                             "drift_gpart",
                                             "drift_gpart_out",
67
                                             "end_hydro_force",
68
69
70
                                             "kick1",
                                             "kick2",
                                             "timestep",
71
                                             "timestep_limiter",
72
73
74
75
76
77
78
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
79
                                             "grav_end_force",
80
81
                                             "cooling",
                                             "star_formation",
82
83
                                             "star_formation_in",
                                             "star_formation_out",
84
                                             "logger",
85
86
                                             "stars_in",
                                             "stars_out",
87
88
                                             "stars_ghost_in",
                                             "stars_ghost",
Loic Hausammann's avatar
Loic Hausammann committed
89
                                             "stars_ghost_out",
90
                                             "stars_sort"};
91

92
/* Sub-task type names. */
93
const char *subtaskID_names[task_subtype_count] = {
94
95
96
97
    "none",    "density",       "gradient",      "force",
    "limiter", "grav",          "external_grav", "tend",
    "xv",      "rho",           "gpart",         "multipole",
    "spart",   "stars_density", "stars_feedback"};
98

99
100
101
102
103
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

104
105
/**
 * @brief Computes the overlap between the parts array of two given cells.
106
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
107
108
109
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
110
 */
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }
128

129
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
130
131
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
Loic Hausammann's avatar
Loic Hausammann committed
132

133
134
135
136
137
/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
138
139
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {
140
141
142
143
144
145
146

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;

147
    case task_type_drift_part:
148
149
    case task_type_sort:
    case task_type_ghost:
150
    case task_type_extra_ghost:
151
    case task_type_timestep_limiter:
Stefan Arridge's avatar
Stefan Arridge committed
152
    case task_type_cooling:
153
    case task_type_end_hydro_force:
154
155
156
      return task_action_part;
      break;

157
158
159
    case task_type_star_formation:
      return task_action_all;

160
    case task_type_drift_spart:
161
    case task_type_stars_ghost:
162
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
163
164
165
      return task_action_spart;
      break;

166
167
168
169
170
171
172
    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
173
        case task_subtype_gradient:
174
        case task_subtype_force:
175
        case task_subtype_limiter:
176
177
178
          return task_action_part;
          break;

179
        case task_subtype_stars_density:
Alexei Borissov's avatar
Alexei Borissov committed
180
        case task_subtype_stars_feedback:
181
182
          return task_action_all;
          break;
183

184
        case task_subtype_grav:
185
        case task_subtype_external_grav:
186
187
188
189
          return task_action_gpart;
          break;

        default:
190
191
192
193
#ifdef SWIFT_DEBUG_CHECKS
          error("Unknown task_action for task %s/%s", taskID_names[t->type],
                subtaskID_names[t->subtype]);
#endif
194
195
196
197
198
          return task_action_none;
          break;
      }
      break;

199
200
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
201
    case task_type_logger:
202
    case task_type_timestep:
203
204
    case task_type_send:
    case task_type_recv:
205
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
206
        return task_action_all;
207
      else if (t->ci->hydro.count > 0)
208
        return task_action_part;
209
      else if (t->ci->grav.count > 0)
210
        return task_action_gpart;
211
212
      else {
#ifdef SWIFT_DEBUG_CHECKS
213
        error("Task without particles");
214
215
#endif
      }
216
217
      break;

218
    case task_type_init_grav:
219
    case task_type_grav_mm:
220
    case task_type_grav_long_range:
221
222
223
      return task_action_multipole;
      break;

224
    case task_type_drift_gpart:
225
    case task_type_grav_down:
226
    case task_type_end_grav_force:
227
    case task_type_grav_mesh:
228
      return task_action_gpart;
229
      break;
230

231
    default:
232
233
234
235
#ifdef SWIFT_DEBUG_CHECKS
      error("Unknown task_action for task %s/%s", taskID_names[t->type],
            subtaskID_names[t->subtype]);
#endif
236
237
238
      return task_action_none;
      break;
  }
239

240
241
242
243
244
#ifdef SWIFT_DEBUG_CHECKS
  error("Unknown task_action for task %s/%s", taskID_names[t->type],
        subtaskID_names[t->subtype]);
#endif
  /* Silence compiler warnings. We should never get here. */
245
  return task_action_none;
246
247
}

248
249
250
251
252
253
254
/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
255
256
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {
257
258
259
260
261
262

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

263
264
  /* First check if any of the two tasks are of a type that don't
     use cells. */
265
266
267
268
269
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
270
271
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
272
273
274
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
275
276
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);
277
278
279
280
281
282

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
283
284
285
286
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;
287

288
    if (size_union == 0) return 0.f;
289

290
291
292
293
294
295
296
297
298
299
300
301
302
303
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
304
305
306
307
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;
308

309
310
    if (size_union == 0) return 0.f;

311
312
313
314
315
316
317
318
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }
319

Loic Hausammann's avatar
Loic Hausammann committed
320
321
322
323
324
  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
325
326
327
328
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;
Loic Hausammann's avatar
Loic Hausammann committed
329

330
    if (size_union == 0) return 0.f;
Loic Hausammann's avatar
Loic Hausammann committed
331

Loic Hausammann's avatar
Loic Hausammann committed
332
333
334
335
336
337
338
339
340
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

341
342
  /* Else, no overlap */
  return 0.f;
343
}
344

345
346
/**
 * @brief Unlock the cell held by this task.
347
 *
348
349
 * @param t The #task.
 */
350
351
void task_unlock(struct task *t) {

352
353
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
354
355
  struct cell *ci = t->ci, *cj = t->cj;

356
  /* Act based on task type. */
357
358
  switch (type) {

359
360
    case task_type_kick1:
    case task_type_kick2:
361
    case task_type_logger:
362
    case task_type_timestep:
363
364
365
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;
Matthieu Schaller's avatar
Matthieu Schaller committed
366

367
    case task_type_drift_part:
368
    case task_type_sort:
369
    case task_type_ghost:
370
    case task_type_end_hydro_force:
371
    case task_type_timestep_limiter:
372
373
374
      cell_unlocktree(ci);
      break;

375
    case task_type_drift_gpart:
376
    case task_type_grav_mesh:
377
    case task_type_end_grav_force:
378
379
380
      cell_gunlocktree(ci);
      break;

381
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
382
383
384
      cell_sunlocktree(ci);
      break;

385
    case task_type_self:
386
    case task_type_sub_self:
387
388
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
389
        cell_munlocktree(ci);
Loic Hausammann's avatar
Loic Hausammann committed
390
391
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
Alexei Borissov's avatar
Alexei Borissov committed
392
393
394
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
395
396
397
      } else {
        cell_unlocktree(ci);
      }
398
      break;
399

400
    case task_type_pair:
401
    case task_type_sub_pair:
402
403
404
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
405
406
        cell_munlocktree(ci);
        cell_munlocktree(cj);
Loic Hausammann's avatar
Loic Hausammann committed
407
408
409
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
Alexei Borissov's avatar
Alexei Borissov committed
410
411
412
413
414
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
415
416
417
418
419
420
      } else {
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

421
    case task_type_grav_down:
422
      cell_gunlocktree(ci);
423
424
425
      cell_munlocktree(ci);
      break;

426
    case task_type_grav_long_range:
427
      cell_munlocktree(ci);
428
      break;
429

430
431
432
433
434
    case task_type_grav_mm:
      cell_munlocktree(ci);
      cell_munlocktree(cj);
      break;

435
436
437
438
    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
439
      break;
440

441
442
443
444
    default:
      break;
  }
}
445
446
447
448
449
450

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
451
452
int task_lock(struct task *t) {

453
454
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
455
  struct cell *ci = t->ci, *cj = t->cj;
456
457
458
459
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif
460

461
  switch (type) {
462

463
464
465
    /* Communication task? */
    case task_type_recv:
    case task_type_send:
466
#ifdef WITH_MPI
467
468
469
470
471
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
472
473
474
475
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
476
477
      }
      return res;
478
#else
479
      error("SWIFT was not compiled with MPI support.");
480
#endif
481
      break;
482

483
484
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
485
    case task_type_logger:
486
    case task_type_timestep:
487
      if (ci->hydro.hold || ci->grav.phold) return 0;
488
489
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
Matthieu Schaller's avatar
Matthieu Schaller committed
490
491
        cell_unlocktree(ci);
        return 0;
492
493
494
      }
      break;

495
    case task_type_drift_part:
496
    case task_type_sort:
497
    case task_type_ghost:
498
    case task_type_end_hydro_force:
499
    case task_type_timestep_limiter:
500
      if (ci->hydro.hold) return 0;
501
502
      if (cell_locktree(ci) != 0) return 0;
      break;
503

504
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
505
506
507
508
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

509
    case task_type_drift_gpart:
510
    case task_type_end_grav_force:
511
    case task_type_grav_mesh:
512
      if (ci->grav.phold) return 0;
513
514
515
      if (cell_glocktree(ci) != 0) return 0;
      break;

516
    case task_type_self:
517
    case task_type_sub_self:
518
      if (subtype == task_subtype_grav) {
519
        /* Lock the gparts and the m-pole */
520
        if (ci->grav.phold || ci->grav.mhold) return 0;
521
522
523
524
525
526
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
Loic Hausammann's avatar
Loic Hausammann committed
527
528
529
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
Alexei Borissov's avatar
Alexei Borissov committed
530
531
532
533
534
535
536
537
538
      } else if (subtype == task_subtype_stars_feedback) {
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
Loic Hausammann's avatar
Loic Hausammann committed
539
        if (ci->hydro.hold) return 0;
540
541
542
        if (cell_locktree(ci) != 0) return 0;
      }
      break;
543

544
    case task_type_pair:
545
    case task_type_sub_pair:
546
      if (subtype == task_subtype_grav) {
547
        /* Lock the gparts and the m-pole in both cells */
548
        if (ci->grav.phold || cj->grav.phold) return 0;
549
550
551
552
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
553
554
555
556
557
558
559
560
561
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
562
        }
Loic Hausammann's avatar
Loic Hausammann committed
563
564
565
566
567
568
569
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
Alexei Borissov's avatar
Alexei Borissov committed
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
      } else if (subtype == task_subtype_stars_feedback) {
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
591
        /* Lock the parts in both cells */
592
        if (ci->hydro.hold || cj->hydro.hold) return 0;
593
594
595
596
597
598
599
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;
600

601
602
    case task_type_grav_down:
      /* Lock the gparts and the m-poles */
603
      if (ci->grav.phold || ci->grav.mhold) return 0;
604
605
606
607
608
609
610
611
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
      break;

612
    case task_type_grav_long_range:
613
      /* Lock the m-poles */
614
      if (ci->grav.mhold) return 0;
615
      if (cell_mlocktree(ci) != 0) return 0;
Matthieu Schaller's avatar
Matthieu Schaller committed
616
617
      break;

618
619
    case task_type_grav_mm:
      /* Lock both m-poles */
620
      if (ci->grav.mhold || cj->grav.mhold) return 0;
621
622
623
624
625
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
      break;

    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }
641

642
643
    default:
      break;
644
645
646
647
648
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}
649

650
651
652
653
654
655
656
657
658
659
660
/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}
661

662
663
664
665
666
667
/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
668
 * @param type The #task type.
669
 * @param subtype The #task subtype.
670
 * @param cluster (return) The group name (should be allocated)
671
 */
672
void task_get_group_name(int type, int subtype, char *cluster) {
673

674
675
  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {
676
677
678
679
680

    strcpy(cluster, "Gravity");
    return;
  }

681
  switch (subtype) {
682
683
684
685
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
686
      if (type == task_type_send || type == task_type_recv) {
687
688
689
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Gradient");
690
      }
691
692
      break;
    case task_subtype_force:
693
      strcpy(cluster, "Force");
694
695
696
697
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
698
699
700
    case task_subtype_limiter:
      strcpy(cluster, "Timestep_limiter");
      break;
701
    case task_subtype_stars_density:
702
703
704
705
      strcpy(cluster, "StarsDensity");
      break;
    case task_subtype_stars_feedback:
      strcpy(cluster, "StarsFeedback");
706
707
708
709
710
711
712
713
714
715
716
717
718
719
      break;
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
720
void task_get_full_name(int type, int subtype, char *name) {
721
722
723

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
724
  if (type >= task_type_count) error("Unknown task type %i", type);
725

726
  if (subtype >= task_subtype_count)
727
728
729
730
731
732
733
734
735
736
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

737
738
739
740
741
742
743
744
745
746
#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
#endif
747
748

/**
749
750
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
 * under MPI. Note if running under MPIU all the ranks are dumped into this
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

  /* Need this to convert ticks to seconds. */
  unsigned long long cpufreq = clocks_get_cpufreq();

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
Josh Borrow's avatar
Josh Borrow committed
793
794
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
795
796
797
798
799
800
801
802
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
803
804
              (long long int)e->sched.tasks[l].tic,
              (long long int)e->sched.tasks[l].toc,
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
834
835
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
836
837
838
839
840
841
842
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
843
844
          (unsigned long long)e->sched.tasks[l].tic,
          (unsigned long long)e->sched.tasks[l].toc,
845
846
847
848
849
850
851
852
853
854
855
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
856
857
858
859
860
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
861
862
863
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
864
 *
865
866
867
868
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
 * and total time, in millisec and then the fixed costs value.
869
 *
870
871
872
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
873
 *
874
 * @param dumpfile name of the file for the output.
875
 * @param e the #engine
876
877
878
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
879
 */
Peter W. Draper's avatar
Peter W. Draper committed
880
881
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920

  /* Need arrays for sum, min and max across all types and subtypes. */
  double sum[task_type_count][task_subtype_count];
  double min[task_type_count][task_subtype_count];
  double max[task_type_count][task_subtype_count];
  int count[task_type_count][task_subtype_count];

  for (int j = 0; j < task_type_count; j++) {
    for (int k = 0; k < task_subtype_count; k++) {
      sum[j][k] = 0.0;
      count[j][k] = 0;
      min[j][k] = DBL_MAX;
      max[j][k] = 0.0;
    }
  }

  double total[1] = {0.0};
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    int type = e->sched.tasks[l].type;

    /* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
     * are not interesting (or meaningfully measured). */
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
        type != task_type_send && type != task_type_recv) {
      int subtype = e->sched.tasks[l].subtype;

      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
      sum[type][subtype] += dt;
      count[type][subtype] += 1;
      if (dt < min[type][subtype]) {
        min[type][subtype] = dt;
      }
      if (dt > max[type][subtype]) {
        max[type][subtype] = dt;
      }
      total[0] += dt;
    }
  }

921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
#ifdef WITH_MPI
  if (allranks || header) {
    /* Get these from all ranks for output from rank 0. Could wrap these into a
     * single operation. */
    size_t size = task_type_count * task_subtype_count;
    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
                     MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
  }
946

947
  if (!allranks || (engine_rank == 0 && (allranks || header))) {
948
949
950
#endif

    FILE *dfile = fopen(dumpfile, "w");
951
952
953
954
955
956
    if (header) {
      fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
      fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
    } else {
      fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
    }
957
958
959
960
961
962
963

    for (int j = 0; j < task_type_count; j++) {
      const char *taskID = taskID_names[j];
      for (int k = 0; k < task_subtype_count; k++) {
        if (sum[j][k] > 0.0) {
          double mean = sum[j][k] / (double)count[j][k];
          double perc = 100.0 * sum[j][k] / total[0];
964
965
966
967

          /* Fixed cost is in .1ns as we want to compare between runs in
           * some absolute units. */
          int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
968
969
970
971
972
973
974
975
          if (header) {
            fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
                    k, fixed_cost, taskID, subtaskID_names[k]);
          } else {
            fprintf(dfile,
                    "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
                    taskID, subtaskID_names[k], count[j][k],
                    clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
Peter W. Draper's avatar
Peter W. Draper committed
976
977
                    clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), perc,
                    fixed_cost);
978
          }
979
980
981
982
983
984
985
        }
      }
    }
    fclose(dfile);
#ifdef WITH_MPI
  }
#endif
986
}