task.c 32.7 KB
Newer Older
1
/*******************************************************************************
2
 * This file is part of SWIFT.
3
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
4
5
6
7
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
8
 *
9
10
11
12
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
13
 *
14
15
16
17
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
18
 *
19
20
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
 *
22
23
24
25
26
27
28
29
30
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
31
32
33
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
34

35
36
/* MPI headers. */
#ifdef WITH_MPI
37
#include <mpi.h>
38
39
#endif

40
41
42
/* This object's header. */
#include "task.h"

43
/* Local headers. */
Pedro Gonnet's avatar
Pedro Gonnet committed
44
#include "atomic.h"
45
#include "engine.h"
46
#include "error.h"
47
#include "inline.h"
48
#include "lock.h"
49
50

/* Task type names. */
51
52
53
54
55
56
57
58
59
60
61
62
63
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
64
                                             "drift_spart",
65
                                             "drift_bpart",
66
67
                                             "drift_gpart",
                                             "drift_gpart_out",
68
                                             "end_hydro_force",
69
70
71
                                             "kick1",
                                             "kick2",
                                             "timestep",
72
                                             "timestep_limiter",
73
74
75
76
77
78
79
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
80
                                             "grav_end_force",
81
82
                                             "cooling",
                                             "star_formation",
83
84
                                             "star_formation_in",
                                             "star_formation_out",
85
                                             "logger",
86
87
                                             "stars_in",
                                             "stars_out",
88
89
                                             "stars_ghost_in",
                                             "stars_ghost",
Loic Hausammann's avatar
Loic Hausammann committed
90
                                             "stars_ghost_out",
91
                                             "stars_sort",
92
93
                                             "bh_in",
                                             "bh_out",
94
                                             "bh_ghost"};
95

96
/* Sub-task type names. */
97
const char *subtaskID_names[task_subtype_count] = {
98
99
100
101
102
    "none",          "density",        "gradient",      "force",
    "limiter",       "grav",           "external_grav", "tend_part",
    "tend_gpart",    "tend_spart",     "tend_bpart",    "xv",
    "rho",           "gpart",          "multipole",     "spart",
    "stars_density", "stars_feedback", "bh_density",    "bh_feedback"};
103

104
105
106
107
108
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

109
110
/**
 * @brief Computes the overlap between the parts array of two given cells.
111
 *
Matthieu Schaller's avatar
Matthieu Schaller committed
112
113
114
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
115
 */
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }
133

134
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
135
136
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
Loic Hausammann's avatar
Loic Hausammann committed
137

138
139
140
141
142
/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
143
144
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {
145
146
147
148
149
150
151

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;

152
    case task_type_drift_part:
153
154
    case task_type_sort:
    case task_type_ghost:
155
    case task_type_extra_ghost:
156
    case task_type_timestep_limiter:
Stefan Arridge's avatar
Stefan Arridge committed
157
    case task_type_cooling:
158
    case task_type_end_hydro_force:
159
160
161
      return task_action_part;
      break;

162
163
164
    case task_type_star_formation:
      return task_action_all;

165
    case task_type_drift_spart:
166
    case task_type_stars_ghost:
167
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
168
169
170
      return task_action_spart;
      break;

171
172
173
174
175
    case task_type_drift_bpart:
    case task_type_bh_ghost:
      return task_action_bpart;
      break;

176
177
178
179
180
181
182
    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
183
        case task_subtype_gradient:
184
        case task_subtype_force:
185
        case task_subtype_limiter:
186
187
188
          return task_action_part;
          break;

189
        case task_subtype_stars_density:
Alexei Borissov's avatar
Alexei Borissov committed
190
        case task_subtype_stars_feedback:
191
192
          return task_action_all;
          break;
193

194
195
196
197
198
        case task_subtype_bh_density:
        case task_subtype_bh_feedback:
          return task_action_all;
          break;

199
        case task_subtype_grav:
200
        case task_subtype_external_grav:
201
202
203
204
          return task_action_gpart;
          break;

        default:
205
206
207
208
#ifdef SWIFT_DEBUG_CHECKS
          error("Unknown task_action for task %s/%s", taskID_names[t->type],
                subtaskID_names[t->subtype]);
#endif
209
210
211
212
213
          return task_action_none;
          break;
      }
      break;

214
215
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
216
    case task_type_logger:
217
    case task_type_timestep:
218
219
    case task_type_send:
    case task_type_recv:
220
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
221
        return task_action_all;
222
      else if (t->ci->hydro.count > 0)
223
        return task_action_part;
224
      else if (t->ci->grav.count > 0)
225
        return task_action_gpart;
226
227
      else {
#ifdef SWIFT_DEBUG_CHECKS
228
        error("Task without particles");
229
230
#endif
      }
231
232
      break;

233
    case task_type_init_grav:
234
    case task_type_grav_mm:
235
    case task_type_grav_long_range:
236
237
238
      return task_action_multipole;
      break;

239
    case task_type_drift_gpart:
240
    case task_type_grav_down:
241
    case task_type_end_grav_force:
242
    case task_type_grav_mesh:
243
      return task_action_gpart;
244
      break;
245

246
    default:
247
248
249
250
#ifdef SWIFT_DEBUG_CHECKS
      error("Unknown task_action for task %s/%s", taskID_names[t->type],
            subtaskID_names[t->subtype]);
#endif
251
252
253
      return task_action_none;
      break;
  }
254

255
256
257
258
259
#ifdef SWIFT_DEBUG_CHECKS
  error("Unknown task_action for task %s/%s", taskID_names[t->type],
        subtaskID_names[t->subtype]);
#endif
  /* Silence compiler warnings. We should never get here. */
260
  return task_action_none;
261
262
}

263
264
265
266
267
268
269
/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
270
271
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {
272
273
274
275
276
277

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

278
279
  /* First check if any of the two tasks are of a type that don't
     use cells. */
280
281
282
283
284
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
285
286
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
287
288
289
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
290
291
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);
292
293
294
295
296
297

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
298
299
300
301
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;
302

303
    if (size_union == 0) return 0.f;
304

305
306
307
308
309
310
311
312
313
314
315
316
317
318
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
319
320
321
322
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;
323

324
325
    if (size_union == 0) return 0.f;

326
327
328
329
330
331
332
333
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }
334

Loic Hausammann's avatar
Loic Hausammann committed
335
336
337
338
339
  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
340
341
342
343
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;
Loic Hausammann's avatar
Loic Hausammann committed
344

345
    if (size_union == 0) return 0.f;
Loic Hausammann's avatar
Loic Hausammann committed
346

Loic Hausammann's avatar
Loic Hausammann committed
347
348
349
350
351
352
353
354
355
    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

356
357
  /* Else, no overlap */
  return 0.f;
358
}
359

360
361
/**
 * @brief Unlock the cell held by this task.
362
 *
363
364
 * @param t The #task.
 */
365
366
void task_unlock(struct task *t) {

367
368
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
369
370
  struct cell *ci = t->ci, *cj = t->cj;

371
  /* Act based on task type. */
372
373
  switch (type) {

374
375
    case task_type_kick1:
    case task_type_kick2:
376
    case task_type_logger:
377
    case task_type_timestep:
378
379
380
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;
Matthieu Schaller's avatar
Matthieu Schaller committed
381

382
    case task_type_drift_part:
383
    case task_type_sort:
384
    case task_type_ghost:
385
    case task_type_end_hydro_force:
386
    case task_type_timestep_limiter:
387
388
389
      cell_unlocktree(ci);
      break;

390
    case task_type_drift_gpart:
391
    case task_type_grav_mesh:
392
    case task_type_end_grav_force:
393
394
395
      cell_gunlocktree(ci);
      break;

396
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
397
398
399
      cell_sunlocktree(ci);
      break;

400
    case task_type_self:
401
    case task_type_sub_self:
402
403
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
404
        cell_munlocktree(ci);
Loic Hausammann's avatar
Loic Hausammann committed
405
406
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
Alexei Borissov's avatar
Alexei Borissov committed
407
408
409
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
410
411
412
      } else {
        cell_unlocktree(ci);
      }
413
      break;
414

415
    case task_type_pair:
416
    case task_type_sub_pair:
417
418
419
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
420
421
        cell_munlocktree(ci);
        cell_munlocktree(cj);
Loic Hausammann's avatar
Loic Hausammann committed
422
423
424
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
Alexei Borissov's avatar
Alexei Borissov committed
425
426
427
428
429
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
430
431
432
433
434
435
      } else {
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

436
    case task_type_grav_down:
437
      cell_gunlocktree(ci);
438
439
440
      cell_munlocktree(ci);
      break;

441
    case task_type_grav_long_range:
442
      cell_munlocktree(ci);
443
      break;
444

445
446
447
448
449
    case task_type_grav_mm:
      cell_munlocktree(ci);
      cell_munlocktree(cj);
      break;

450
451
452
453
    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
454
      break;
455

456
457
458
459
    default:
      break;
  }
}
460
461
462
463
464
465

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
466
467
int task_lock(struct task *t) {

468
469
  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
470
  struct cell *ci = t->ci, *cj = t->cj;
471
472
473
474
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif
475

476
  switch (type) {
477

478
479
480
    /* Communication task? */
    case task_type_recv:
    case task_type_send:
481
#ifdef WITH_MPI
482
483
484
485
486
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
487
488
489
490
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
491
492
      }
      return res;
493
#else
494
      error("SWIFT was not compiled with MPI support.");
495
#endif
496
      break;
497

498
499
    case task_type_kick1:
    case task_type_kick2:
Loikki's avatar
Loikki committed
500
    case task_type_logger:
501
    case task_type_timestep:
502
      if (ci->hydro.hold || ci->grav.phold) return 0;
503
504
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
Matthieu Schaller's avatar
Matthieu Schaller committed
505
506
        cell_unlocktree(ci);
        return 0;
507
508
509
      }
      break;

510
    case task_type_drift_part:
511
    case task_type_sort:
512
    case task_type_ghost:
513
    case task_type_end_hydro_force:
514
    case task_type_timestep_limiter:
515
      if (ci->hydro.hold) return 0;
516
517
      if (cell_locktree(ci) != 0) return 0;
      break;
518

519
    case task_type_stars_sort:
Loic Hausammann's avatar
Loic Hausammann committed
520
521
522
523
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

524
    case task_type_drift_gpart:
525
    case task_type_end_grav_force:
526
    case task_type_grav_mesh:
527
      if (ci->grav.phold) return 0;
528
529
530
      if (cell_glocktree(ci) != 0) return 0;
      break;

531
    case task_type_self:
532
    case task_type_sub_self:
533
      if (subtype == task_subtype_grav) {
534
        /* Lock the gparts and the m-pole */
535
        if (ci->grav.phold || ci->grav.mhold) return 0;
536
537
538
539
540
541
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
Loic Hausammann's avatar
Loic Hausammann committed
542
543
544
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
Alexei Borissov's avatar
Alexei Borissov committed
545
546
547
548
549
550
551
552
553
      } else if (subtype == task_subtype_stars_feedback) {
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
Loic Hausammann's avatar
Loic Hausammann committed
554
        if (ci->hydro.hold) return 0;
555
556
557
        if (cell_locktree(ci) != 0) return 0;
      }
      break;
558

559
    case task_type_pair:
560
    case task_type_sub_pair:
561
      if (subtype == task_subtype_grav) {
562
        /* Lock the gparts and the m-pole in both cells */
563
        if (ci->grav.phold || cj->grav.phold) return 0;
564
565
566
567
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
568
569
570
571
572
573
574
575
576
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
577
        }
Loic Hausammann's avatar
Loic Hausammann committed
578
579
580
581
582
583
584
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
Alexei Borissov's avatar
Alexei Borissov committed
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
      } else if (subtype == task_subtype_stars_feedback) {
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
606
        /* Lock the parts in both cells */
607
        if (ci->hydro.hold || cj->hydro.hold) return 0;
608
609
610
611
612
613
614
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;
615

616
617
    case task_type_grav_down:
      /* Lock the gparts and the m-poles */
618
      if (ci->grav.phold || ci->grav.mhold) return 0;
619
620
621
622
623
624
625
626
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
      break;

627
    case task_type_grav_long_range:
628
      /* Lock the m-poles */
629
      if (ci->grav.mhold) return 0;
630
      if (cell_mlocktree(ci) != 0) return 0;
Matthieu Schaller's avatar
Matthieu Schaller committed
631
632
      break;

633
634
    case task_type_grav_mm:
      /* Lock both m-poles */
635
      if (ci->grav.mhold || cj->grav.mhold) return 0;
636
637
638
639
640
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
      break;

    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }
656

657
658
    default:
      break;
659
660
661
662
663
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}
664

665
666
667
668
669
670
671
672
673
674
675
/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}
676

677
678
679
680
681
682
/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
683
 * @param type The #task type.
684
 * @param subtype The #task subtype.
685
 * @param cluster (return) The group name (should be allocated)
686
 */
687
void task_get_group_name(int type, int subtype, char *cluster) {
688

689
690
  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {
691
692
693
694
695

    strcpy(cluster, "Gravity");
    return;
  }

696
  switch (subtype) {
697
698
699
700
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
701
      if (type == task_type_send || type == task_type_recv) {
702
703
704
        strcpy(cluster, "None");
      } else {
        strcpy(cluster, "Gradient");
705
      }
706
707
      break;
    case task_subtype_force:
708
      strcpy(cluster, "Force");
709
710
711
712
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
713
714
715
    case task_subtype_limiter:
      strcpy(cluster, "Timestep_limiter");
      break;
716
    case task_subtype_stars_density:
717
718
719
720
      strcpy(cluster, "StarsDensity");
      break;
    case task_subtype_stars_feedback:
      strcpy(cluster, "StarsFeedback");
721
      break;
722
723
724
725
726
727
    case task_subtype_bh_density:
      strcpy(cluster, "BHDensity");
      break;
    case task_subtype_bh_feedback:
      strcpy(cluster, "BHFeedback");
      break;
728
729
730
731
732
733
734
735
736
737
738
739
740
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
741
void task_get_full_name(int type, int subtype, char *name) {
742
743
744

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
745
  if (type >= task_type_count) error("Unknown task type %i", type);
746

747
  if (subtype >= task_subtype_count)
748
749
750
751
752
753
754
755
756
757
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

758
759
760
761
762
763
764
765
766
767
#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
#endif
768
769

/**
770
771
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
 * under MPI. Note if running under MPIU all the ranks are dumped into this
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

  /* Need this to convert ticks to seconds. */
  unsigned long long cpufreq = clocks_get_cpufreq();

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
Josh Borrow's avatar
Josh Borrow committed
814
815
              engine_rank, (long long int)e->tic_step,
              (long long int)e->toc_step, e->updates, e->g_updates,
816
817
818
819
820
821
822
823
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
824
825
              (long long int)e->sched.tasks[l].tic,
              (long long int)e->sched.tasks[l].toc,
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
855
856
          -1, -1, 1, (unsigned long long)e->tic_step,
          (unsigned long long)e->toc_step, e->updates, e->g_updates,
857
858
859
860
861
862
863
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
Josh Borrow's avatar
Josh Borrow committed
864
865
          (unsigned long long)e->sched.tasks[l].tic,
          (unsigned long long)e->sched.tasks[l].toc,
866
867
868
869
870
871
872
873
874
875
876
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
877
878
879
880
881
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
882
883
884
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
885
 *
886
887
888
889
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
 * and total time, in millisec and then the fixed costs value.
890
 *
891
892
893
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
894
 *
895
 * @param dumpfile name of the file for the output.
896
 * @param e the #engine
897
898
899
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
900
 */
Peter W. Draper's avatar
Peter W. Draper committed
901
902
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941

  /* Need arrays for sum, min and max across all types and subtypes. */
  double sum[task_type_count][task_subtype_count];
  double min[task_type_count][task_subtype_count];
  double max[task_type_count][task_subtype_count];
  int count[task_type_count][task_subtype_count];

  for (int j = 0; j < task_type_count; j++) {
    for (int k = 0; k < task_subtype_count; k++) {
      sum[j][k] = 0.0;
      count[j][k] = 0;
      min[j][k] = DBL_MAX;
      max[j][k] = 0.0;
    }
  }

  double total[1] = {0.0};
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    int type = e->sched.tasks[l].type;

    /* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
     * are not interesting (or meaningfully measured). */
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
        type != task_type_send && type != task_type_recv) {
      int subtype = e->sched.tasks[l].subtype;

      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
      sum[type][subtype] += dt;
      count[type][subtype] += 1;
      if (dt < min[type][subtype]) {
        min[type][subtype] = dt;
      }
      if (dt > max[type][subtype]) {
        max[type][subtype] = dt;
      }
      total[0] += dt;
    }
  }

942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
#ifdef WITH_MPI
  if (allranks || header) {
    /* Get these from all ranks for output from rank 0. Could wrap these into a
     * single operation. */
    size_t size = task_type_count * task_subtype_count;
    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
                     MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
  }
967

968
  if (!allranks || (engine_rank == 0 && (allranks || header))) {
969
970
971
#endif

    FILE *dfile = fopen(dumpfile, "w");
972
973
974
975
976
977
    if (header) {
      fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
      fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
    } else {
      fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
    }
978
979
980
981
982
983
984

    for (int j = 0; j < task_type_count; j++) {
      const char *taskID = taskID_names[j];
      for (int k = 0; k < task_subtype_count; k++) {
        if (sum[j][k] > 0.0) {
          double mean = sum[j][k] / (double)count[j][k];
          double perc = 100.0 * sum[j][k] / total[0];
985
986
987
988

          /* Fixed cost is in .1ns as we want to compare between runs in
           * some absolute units. */
          int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
989
990
991
992
993
994
995
996
          if (header) {
            fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
                    k, fixed_cost, taskID, subtaskID_names[k]);
          } else {
            fprintf(dfile,
                    "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
                    taskID, subtaskID_names[k], count[j][k],
                    clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
Peter W. Draper's avatar
Peter W. Draper committed
997
998
                    clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), perc,
                    fixed_cost);
999
          }
1000
1001
1002
1003
1004
1005
1006
        }
      }
    }
    fclose(dfile);
#ifdef WITH_MPI
  }
#endif
1007
}