Skip to content
Snippets Groups Projects
task.c 30.63 KiB
/*******************************************************************************
 * This file is part of SWIFT.
 * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
 *                    Matthieu Schaller (matthieu.schaller@durham.ac.uk)
 *               2015 Peter W. Draper (p.w.draper@durham.ac.uk)
 *               2016 John A. Regan (john.a.regan@durham.ac.uk)
 *                    Tom Theuns (tom.theuns@durham.ac.uk)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

/* Config parameters. */
#include "../config.h"

/* Some standard headers. */
#include <float.h>
#include <limits.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* MPI headers. */
#ifdef WITH_MPI
#include <mpi.h>
#endif

/* This object's header. */
#include "task.h"

/* Local headers. */
#include "atomic.h"
#include "engine.h"
#include "error.h"
#include "inline.h"
#include "lock.h"

/* Task type names. */
const char *taskID_names[task_type_count] = {"none",
                                             "sort",
                                             "self",
                                             "pair",
                                             "sub_self",
                                             "sub_pair",
                                             "init_grav",
                                             "init_grav_out",
                                             "ghost_in",
                                             "ghost",
                                             "ghost_out",
                                             "extra_ghost",
                                             "drift_part",
                                             "drift_gpart",
                                             "drift_gpart_out",
                                             "end_force",
                                             "kick1",
                                             "kick2",
                                             "timestep",
                                             "timestep_limiter",
                                             "send",
                                             "recv",
                                             "grav_long_range",
                                             "grav_mm",
                                             "grav_down_in",
                                             "grav_down",
                                             "grav_mesh",
                                             "cooling",
                                             "star_formation",
                                             "logger",
                                             "stars_ghost_in",
                                             "stars_ghost",
                                             "stars_ghost_out",
                                             "stars_sort"};

/* Sub-task type names. */
const char *subtaskID_names[task_subtype_count] = {
    "none",    "density",       "gradient",      "force",
    "limiter", "grav",          "external_grav", "tend",
    "xv",      "rho",           "gpart",         "multipole",
    "spart",   "stars_density", "stars_feedback"};

#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif

/**
 * @brief Computes the overlap between the parts array of two given cells.
 *
 * @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
 * @param ARRAY is the array of this specific type.
 * @param COUNT is the number of elements in the array.
 */
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT)                               \
  __attribute__((always_inline))                                            \
      INLINE static size_t task_cell_overlap_##TYPE(                        \
          const struct cell *restrict ci, const struct cell *restrict cj) { \
                                                                            \
    if (ci == NULL || cj == NULL) return 0;                                 \
                                                                            \
    if (ci->ARRAY <= cj->ARRAY &&                                           \
        ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) {                   \
      return cj->COUNT;                                                     \
    } else if (cj->ARRAY <= ci->ARRAY &&                                    \
               cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) {            \
      return ci->COUNT;                                                     \
    }                                                                       \
                                                                            \
    return 0;                                                               \
  }

TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);

/**
 * @brief Returns the #task_actions for a given task.
 *
 * @param t The #task.
 */
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
    const struct task *t) {

  switch (t->type) {

    case task_type_none:
      return task_action_none;
      break;
    case task_type_drift_part:
    case task_type_sort:
    case task_type_ghost:
    case task_type_extra_ghost:
    case task_type_timestep_limiter:
    case task_type_cooling:
      return task_action_part;
      break;

    case task_type_star_formation:
      return task_action_all;

    case task_type_stars_ghost:
    case task_type_stars_sort:
      return task_action_spart;
      break;

    case task_type_self:
    case task_type_pair:
    case task_type_sub_self:
    case task_type_sub_pair:
      switch (t->subtype) {

        case task_subtype_density:
        case task_subtype_gradient:
        case task_subtype_force:
        case task_subtype_limiter:
          return task_action_part;
          break;

        case task_subtype_stars_density:
        case task_subtype_stars_feedback:
          return task_action_all;
          break;

        case task_subtype_grav:
        case task_subtype_external_grav:
          return task_action_gpart;
          break;

        default:
          error("Unknow task_action for task");
          return task_action_none;
          break;
      }
      break;

    case task_type_end_force:
    case task_type_kick1:
    case task_type_kick2:
    case task_type_logger:
    case task_type_timestep:
    case task_type_send:
    case task_type_recv:
      if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
        return task_action_all;
      else if (t->ci->hydro.count > 0)
        return task_action_part;
      else if (t->ci->grav.count > 0)
        return task_action_gpart;
      else
        error("Task without particles");
      break;

    case task_type_init_grav:
    case task_type_grav_mm:
    case task_type_grav_long_range:
      return task_action_multipole;
      break;
    case task_type_drift_gpart:
    case task_type_grav_down:
    case task_type_grav_mesh:
      return task_action_gpart;
      break;

    default:
      error("Unknown task_action for task");
      return task_action_none;
      break;
  }

  /* Silence compiler warnings */
  error("Unknown task_action for task");
  return task_action_none;
}

/**
 * @brief Compute the Jaccard similarity of the data used by two
 *        different tasks.
 *
 * @param ta The first #task.
 * @param tb The second #task.
 */
float task_overlap(const struct task *restrict ta,
                   const struct task *restrict tb) {

  if (ta == NULL || tb == NULL) return 0.f;

  const enum task_actions ta_act = task_acts_on(ta);
  const enum task_actions tb_act = task_acts_on(tb);

  /* First check if any of the two tasks are of a type that don't
     use cells. */
  if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;

  const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
  const int ta_gpart =
      (ta_act == task_action_gpart || ta_act == task_action_all);
  const int ta_spart =
      (ta_act == task_action_spart || ta_act == task_action_all);
  const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
  const int tb_gpart =
      (tb_act == task_action_gpart || tb_act == task_action_all);
  const int tb_spart =
      (tb_act == task_action_spart || tb_act == task_action_all);

  /* In the case where both tasks act on parts */
  if (ta_part && tb_part) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
    if (ta->ci != NULL) size_union += ta->ci->hydro.count;
    if (ta->cj != NULL) size_union += ta->cj->hydro.count;
    if (tb->ci != NULL) size_union += tb->ci->hydro.count;
    if (tb->cj != NULL) size_union += tb->cj->hydro.count;

    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
                                  task_cell_overlap_part(ta->ci, tb->cj) +
                                  task_cell_overlap_part(ta->cj, tb->ci) +
                                  task_cell_overlap_part(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on gparts */
  else if (ta_gpart && tb_gpart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
    if (ta->ci != NULL) size_union += ta->ci->grav.count;
    if (ta->cj != NULL) size_union += ta->cj->grav.count;
    if (tb->ci != NULL) size_union += tb->ci->grav.count;
    if (tb->cj != NULL) size_union += tb->cj->grav.count;

    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
                                  task_cell_overlap_gpart(ta->ci, tb->cj) +
                                  task_cell_overlap_gpart(ta->cj, tb->ci) +
                                  task_cell_overlap_gpart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* In the case where both tasks act on sparts */
  else if (ta_spart && tb_spart) {

    /* Compute the union of the cell data. */
    size_t size_union = 0;
    if (ta->ci != NULL) size_union += ta->ci->stars.count;
    if (ta->cj != NULL) size_union += ta->cj->stars.count;
    if (tb->ci != NULL) size_union += tb->ci->stars.count;
    if (tb->cj != NULL) size_union += tb->cj->stars.count;

    /* Compute the intersection of the cell data. */
    const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
                                  task_cell_overlap_spart(ta->ci, tb->cj) +
                                  task_cell_overlap_spart(ta->cj, tb->ci) +
                                  task_cell_overlap_spart(ta->cj, tb->cj);

    return ((float)size_intersect) / (size_union - size_intersect);
  }

  /* Else, no overlap */
  return 0.f;
}

/**
 * @brief Unlock the cell held by this task.
 *
 * @param t The #task.
 */
void task_unlock(struct task *t) {

  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
  struct cell *ci = t->ci, *cj = t->cj;

  /* Act based on task type. */
  switch (type) {

    case task_type_end_force:
    case task_type_kick1:
    case task_type_kick2:
    case task_type_logger:
    case task_type_timestep:
      cell_unlocktree(ci);
      cell_gunlocktree(ci);
      break;

    case task_type_drift_part:
    case task_type_sort:
    case task_type_ghost:
    case task_type_timestep_limiter:
      cell_unlocktree(ci);
      break;

    case task_type_drift_gpart:
    case task_type_grav_mesh:
      cell_gunlocktree(ci);
      break;

    case task_type_stars_sort:
      cell_sunlocktree(ci);
      break;

    case task_type_self:
    case task_type_sub_self:
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_munlocktree(ci);
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_unlocktree(ci);
      } else {
        cell_unlocktree(ci);
      }
      break;

    case task_type_pair:
    case task_type_sub_pair:
      if (subtype == task_subtype_grav) {
        cell_gunlocktree(ci);
        cell_gunlocktree(cj);
        cell_munlocktree(ci);
        cell_munlocktree(cj);
      } else if (subtype == task_subtype_stars_density) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
      } else if (subtype == task_subtype_stars_feedback) {
        cell_sunlocktree(ci);
        cell_sunlocktree(cj);
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      } else {
        cell_unlocktree(ci);
        cell_unlocktree(cj);
      }
      break;

    case task_type_grav_down:
      cell_gunlocktree(ci);
      cell_munlocktree(ci);
      break;

    case task_type_grav_long_range:
      cell_munlocktree(ci);
      break;

    case task_type_grav_mm:
      cell_munlocktree(ci);
      cell_munlocktree(cj);
      break;

    case task_type_star_formation:
      cell_unlocktree(ci);
      cell_sunlocktree(ci);
      cell_gunlocktree(ci);
      break;

    default:
      break;
  }
}

/**
 * @brief Try to lock the cells associated with this task.
 *
 * @param t the #task.
 */
int task_lock(struct task *t) {

  const enum task_types type = t->type;
  const enum task_subtypes subtype = t->subtype;
  struct cell *ci = t->ci, *cj = t->cj;
#ifdef WITH_MPI
  int res = 0, err = 0;
  MPI_Status stat;
#endif

  switch (type) {

    /* Communication task? */
    case task_type_recv:
    case task_type_send:
#ifdef WITH_MPI
      /* Check the status of the MPI request. */
      if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
        char buff[MPI_MAX_ERROR_STRING];
        int len;
        MPI_Error_string(err, buff, &len);
        error(
            "Failed to test request on send/recv task (type=%s/%s tag=%lld, "
            "%s).",
            taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
      }
      return res;
#else
      error("SWIFT was not compiled with MPI support.");
#endif
      break;

    case task_type_end_force:
    case task_type_kick1:
    case task_type_kick2:
    case task_type_logger:
    case task_type_timestep:
      if (ci->hydro.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      break;

    case task_type_drift_part:
    case task_type_sort:
    case task_type_ghost:
    case task_type_timestep_limiter:
      if (ci->hydro.hold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      break;

    case task_type_stars_sort:
      if (ci->stars.hold) return 0;
      if (cell_slocktree(ci) != 0) return 0;
      break;

    case task_type_drift_gpart:
    case task_type_grav_mesh:
      if (ci->grav.phold) return 0;
      if (cell_glocktree(ci) != 0) return 0;
      break;

    case task_type_self:
    case task_type_sub_self:
      if (subtype == task_subtype_grav) {
        /* Lock the gparts and the m-pole */
        if (ci->grav.phold || ci->grav.mhold) return 0;
        if (cell_glocktree(ci) != 0)
          return 0;
        else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          return 0;
        }
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
      } else if (subtype == task_subtype_stars_feedback) {
        if (ci->stars.hold) return 0;
        if (ci->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
        if (ci->hydro.hold) return 0;
        if (cell_locktree(ci) != 0) return 0;
      }
      break;

    case task_type_pair:
    case task_type_sub_pair:
      if (subtype == task_subtype_grav) {
        /* Lock the gparts and the m-pole in both cells */
        if (ci->grav.phold || cj->grav.phold) return 0;
        if (cell_glocktree(ci) != 0) return 0;
        if (cell_glocktree(cj) != 0) {
          cell_gunlocktree(ci);
          return 0;
        } else if (cell_mlocktree(ci) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          return 0;
        } else if (cell_mlocktree(cj) != 0) {
          cell_gunlocktree(ci);
          cell_gunlocktree(cj);
          cell_munlocktree(ci);
          return 0;
        }
      } else if (subtype == task_subtype_stars_density) {
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
      } else if (subtype == task_subtype_stars_feedback) {
        /* Lock the stars and the gas particles in both cells */
        if (ci->stars.hold || cj->stars.hold) return 0;
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_slocktree(ci) != 0) return 0;
        if (cell_slocktree(cj) != 0) {
          cell_sunlocktree(ci);
          return 0;
        }
        if (cell_locktree(ci) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          return 0;
        }
        if (cell_locktree(cj) != 0) {
          cell_sunlocktree(ci);
          cell_sunlocktree(cj);
          cell_unlocktree(ci);
          return 0;
        }
      } else { /* subtype == hydro */
        /* Lock the parts in both cells */
        if (ci->hydro.hold || cj->hydro.hold) return 0;
        if (cell_locktree(ci) != 0) return 0;
        if (cell_locktree(cj) != 0) {
          cell_unlocktree(ci);
          return 0;
        }
      }
      break;

    case task_type_grav_down:
      /* Lock the gparts and the m-poles */
      if (ci->grav.phold || ci->grav.mhold) return 0;
      if (cell_glocktree(ci) != 0)
        return 0;
      else if (cell_mlocktree(ci) != 0) {
        cell_gunlocktree(ci);
        return 0;
      }
      break;

    case task_type_grav_long_range:
      /* Lock the m-poles */
      if (ci->grav.mhold) return 0;
      if (cell_mlocktree(ci) != 0) return 0;
      break;

    case task_type_grav_mm:
      /* Lock both m-poles */
      if (ci->grav.mhold || cj->grav.mhold) return 0;
      if (cell_mlocktree(ci) != 0) return 0;
      if (cell_mlocktree(cj) != 0) {
        cell_munlocktree(ci);
        return 0;
      }
      break;

    case task_type_star_formation:
      /* Lock the gas, gravity and star particles */
      if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
      if (cell_locktree(ci) != 0) return 0;
      if (cell_slocktree(ci) != 0) {
        cell_unlocktree(ci);
        return 0;
      }
      if (cell_glocktree(ci) != 0) {
        cell_unlocktree(ci);
        cell_sunlocktree(ci);
        return 0;
      }

    default:
      break;
  }

  /* If we made it this far, we've got a lock. */
  return 1;
}

/**
 * @brief Print basic information about a task.
 *
 * @param t The #task.
 */
void task_print(const struct task *t) {

  message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
          taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
          t->nr_unlock_tasks, t->skip);
}

/**
 * @brief Get the group name of a task.
 *
 * This is used to group tasks with similar actions in the task dependency
 * graph.
 *
 * @param type The #task type.
 * @param subtype The #task subtype.
 * @param cluster (return) The group name (should be allocated)
 */
void task_get_group_name(int type, int subtype, char *cluster) {

  if (type == task_type_grav_long_range || type == task_type_grav_mm ||
      type == task_type_grav_mesh) {

    strcpy(cluster, "Gravity");
    return;
  }

  switch (subtype) {
    case task_subtype_density:
      strcpy(cluster, "Density");
      break;
    case task_subtype_gradient:
      strcpy(cluster, "Gradient");
      break;
    case task_subtype_force:
      strcpy(cluster, "Force");
      break;
    case task_subtype_grav:
      strcpy(cluster, "Gravity");
      break;
    case task_subtype_limiter:
      strcpy(cluster, "Timestep_limiter");
      break;
    case task_subtype_stars_density:
      strcpy(cluster, "Stars");
      break;
    default:
      strcpy(cluster, "None");
      break;
  }
}

/**
 * @brief Generate the full name of a #task.
 *
 * @param type The #task type.
 * @param subtype The #task type.
 * @param name (return) The formatted string
 */
void task_get_full_name(int type, int subtype, char *name) {

#ifdef SWIFT_DEBUG_CHECKS
  /* Check input */
  if (type >= task_type_count) error("Unknown task type %i", type);

  if (subtype >= task_subtype_count)
    error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif

  /* Full task name */
  if (subtype == task_subtype_none)
    sprintf(name, "%s", taskID_names[type]);
  else
    sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}

#ifdef WITH_MPI
/**
 * @brief Create global communicators for each of the subtasks.
 */
void task_create_mpi_comms(void) {
  for (int i = 0; i < task_subtype_count; i++) {
    MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
  }
}
#endif

/**
 * @brief dump all the tasks of all the known engines into a file for
 * postprocessing.
 *
 * Dumps the information to a file "thread_info-stepn.dat" where n is the
 * given step value, or "thread_info_MPI-stepn.dat", if we are running
 * under MPI. Note if running under MPIU all the ranks are dumped into this
 * one file, which has an additional field to identify the rank.
 *
 * @param e the #engine
 * @param step the current step.
 */
void task_dump_all(struct engine *e, int step) {

#ifdef SWIFT_DEBUG_TASKS

  /* Need this to convert ticks to seconds. */
  unsigned long long cpufreq = clocks_get_cpufreq();

#ifdef WITH_MPI
  /* Make sure output file is empty, only on one rank. */
  char dumpfile[35];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
  FILE *file_thread;
  if (engine_rank == 0) {
    file_thread = fopen(dumpfile, "w");
    fclose(file_thread);
  }
  MPI_Barrier(MPI_COMM_WORLD);

  for (int i = 0; i < e->nr_nodes; i++) {

    /* Rank 0 decides the index of the writing node, this happens
     * one-by-one. */
    int kk = i;
    MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);

    if (i == engine_rank) {

      /* Open file and position at end. */
      file_thread = fopen(dumpfile, "a");

      /* Add some information to help with the plots and conversion of ticks to
       * seconds. */
      fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
              engine_rank, e->tic_step, e->toc_step, e->updates, e->g_updates,
              e->s_updates, cpufreq);
      int count = 0;
      for (int l = 0; l < e->sched.nr_tasks; l++) {
        if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
          fprintf(
              file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
              engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
              e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
              e->sched.tasks[l].tic, e->sched.tasks[l].toc,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
                                             : 0,
              (e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
                                             : 0,
              (e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
                                             : 0,
              e->sched.tasks[l].flags, e->sched.tasks[l].sid);
        }
        count++;
      }
      fclose(file_thread);
    }

    /* And we wait for all to synchronize. */
    MPI_Barrier(MPI_COMM_WORLD);
  }

#else
  /* Non-MPI, so just a single engine's worth of tasks to dump. */
  char dumpfile[32];
  snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
  FILE *file_thread;
  file_thread = fopen(dumpfile, "w");

  /* Add some information to help with the plots and conversion of ticks to
   * seconds. */
  fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
          -1, -1, 1, e->tic_step, e->toc_step, e->updates, e->g_updates,
          e->s_updates, 0, cpufreq);
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0) {
      fprintf(
          file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
          e->sched.tasks[l].rid, e->sched.tasks[l].type,
          e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
          e->sched.tasks[l].tic, e->sched.tasks[l].toc,
          (e->sched.tasks[l].ci == NULL) ? 0
                                         : e->sched.tasks[l].ci->hydro.count,
          (e->sched.tasks[l].cj == NULL) ? 0
                                         : e->sched.tasks[l].cj->hydro.count,
          (e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
          (e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
          e->sched.tasks[l].sid);
    }
  }
  fclose(file_thread);
#endif  // WITH_MPI
#endif  // SWIFT_DEBUG_TASKS
}

/**
 * @brief Generate simple statistics about the times used by the tasks of
 *        all the engines and write these into two format, a human readable
 *        version for debugging and one intented for inclusion as the fixed
 *        costs for repartitioning.
 *
 * Note that when running under MPI all the tasks can be summed into this single
 * file. In the fuller, human readable file, the statistics included are the
 * number of task of each type/subtype followed by the minimum, maximum, mean
 * and total time, in millisec and then the fixed costs value.
 *
 * If header is set, only the fixed costs value is written into the output
 * file in a format that is suitable for inclusion in SWIFT (as
 * partition_fixed_costs.h).
 *
 * @param dumpfile name of the file for the output.
 * @param e the #engine
 * @param header whether to write a header include file.
 * @param allranks do the statistics over all ranks, if not just the current
 *                 one, only used if header is false.
 */
void task_dump_stats(const char *dumpfile, struct engine *e, int header,
                     int allranks) {

  /* Need arrays for sum, min and max across all types and subtypes. */
  double sum[task_type_count][task_subtype_count];
  double min[task_type_count][task_subtype_count];
  double max[task_type_count][task_subtype_count];
  int count[task_type_count][task_subtype_count];

  for (int j = 0; j < task_type_count; j++) {
    for (int k = 0; k < task_subtype_count; k++) {
      sum[j][k] = 0.0;
      count[j][k] = 0;
      min[j][k] = DBL_MAX;
      max[j][k] = 0.0;
    }
  }

  double total[1] = {0.0};
  for (int l = 0; l < e->sched.nr_tasks; l++) {
    int type = e->sched.tasks[l].type;

    /* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
     * are not interesting (or meaningfully measured). */
    if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
        type != task_type_send && type != task_type_recv) {
      int subtype = e->sched.tasks[l].subtype;

      double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
      sum[type][subtype] += dt;
      count[type][subtype] += 1;
      if (dt < min[type][subtype]) {
        min[type][subtype] = dt;
      }
      if (dt > max[type][subtype]) {
        max[type][subtype] = dt;
      }
      total[0] += dt;
    }
  }

#ifdef WITH_MPI
  if (allranks || header) {
    /* Get these from all ranks for output from rank 0. Could wrap these into a
     * single operation. */
    size_t size = task_type_count * task_subtype_count;
    int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
                         MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
                     MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
                     MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
                     MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");

    res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
                     MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
    if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
  }

  if (!allranks || (engine_rank == 0 && (allranks || header))) {
#endif
    FILE *dfile = fopen(dumpfile, "w");
    if (header) {
      fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
      fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
    } else {
      fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
    }

    for (int j = 0; j < task_type_count; j++) {
      const char *taskID = taskID_names[j];
      for (int k = 0; k < task_subtype_count; k++) {
        if (sum[j][k] > 0.0) {
          double mean = sum[j][k] / (double)count[j][k];
          double perc = 100.0 * sum[j][k] / total[0];

          /* Fixed cost is in .1ns as we want to compare between runs in
           * some absolute units. */
          int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
          if (header) {
            fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
                    k, fixed_cost, taskID, subtaskID_names[k]);
          } else {
            fprintf(dfile,
                    "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n",
                    taskID, subtaskID_names[k], count[j][k],
                    clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
                    clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), perc,
                    fixed_cost);
          }
        }
      }
    }
    fclose(dfile);
#ifdef WITH_MPI
  }
#endif
}