/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
* Matthieu Schaller (schaller@strw.leidenuniv.nl)
* 2015 Peter W. Draper (p.w.draper@durham.ac.uk)
* 2016 John A. Regan (john.a.regan@durham.ac.uk)
* Tom Theuns (tom.theuns@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*
******************************************************************************/
/* Config parameters. */
#include
/* Some standard headers. */
#include
#include
#include
#include
#include
#include
/* MPI headers. */
#ifdef WITH_MPI
#include
#endif
/* This object's header. */
#include "task.h"
/* Local headers. */
#include "atomic.h"
#include "engine.h"
#include "error.h"
#include "inline.h"
#include "lock.h"
#include "mpiuse.h"
/* Task type names. */
const char *taskID_names[task_type_count] = {
"none",
"sort",
"self",
"pair",
"sub_self",
"sub_pair",
"init_grav",
"init_grav_out",
"ghost_in",
"ghost",
"ghost_out",
"extra_ghost",
"drift_part",
"drift_spart",
"drift_sink",
"drift_bpart",
"drift_gpart",
"drift_gpart_out",
"hydro_end_force",
"kick1",
"kick2",
"timestep",
"timestep_limiter",
"timestep_sync",
"collect",
"send",
"recv",
"pack",
"unpack",
"grav_long_range",
"grav_mm",
"grav_down_in",
"grav_down",
"grav_end_force",
"cooling",
"cooling_in",
"cooling_out",
"star_formation",
"star_formation_in",
"star_formation_out",
"star_formation_sink",
"csds",
"stars_in",
"stars_out",
"stars_ghost_in",
"stars_density_ghost",
"stars_ghost_out",
"stars_prep_ghost1",
"hydro_prep_ghost1",
"stars_prep_ghost2",
"stars_sort",
"stars_resort",
"bh_in",
"bh_out",
"bh_density_ghost",
"bh_swallow_ghost1",
"bh_swallow_ghost2",
"bh_swallow_ghost3",
"fof_self",
"fof_pair",
"fof_attach_self",
"fof_attach_pair",
"neutrino_weight",
"sink_in",
"sink_density_ghost",
"sink_ghost1",
"sink_ghost2",
"sink_out",
"rt_in",
"rt_out",
"sink_formation",
"rt_ghost1",
"rt_ghost2",
"rt_transport_out",
"rt_tchem",
"rt_advance_cell_time",
"rt_sorts",
"rt_collect_times",
};
/* Sub-task type names. */
const char *subtaskID_names[task_subtype_count] = {
"none",
"density",
"gradient",
"force",
"limiter",
"grav",
"external_grav",
"tend",
"xv",
"rho",
"part_swallow",
"bpart_merger",
"gpart",
"spart_density",
"part_prep1",
"spart_prep2",
"stars_density",
"stars_prep1",
"stars_prep2",
"stars_feedback",
"sf_counts",
"grav_counts",
"bpart_rho",
"bpart_feedback",
"bh_density",
"bh_swallow",
"do_gas_swallow",
"do_bh_swallow",
"bh_feedback",
"sink_density",
"sink_do_sink_swallow",
"sink_swallow",
"sink_do_gas_swallow",
"rt_gradient",
"rt_transport",
};
const char *task_category_names[task_category_count] = {
"drift", "sorts", "resort",
"hydro", "gravity", "feedback",
"black holes", "cooling", "star formation",
"limiter", "sync", "time integration",
"mpi", "pack", "fof",
"others", "neutrino", "sink",
"RT", "CSDS"};
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
#endif
/**
* @brief Computes the overlap between the parts array of two given cells.
*
* @param TYPE is the type of parts (e.g. #part, #gpart, #spart)
* @param ARRAY is the array of this specific type.
* @param COUNT is the number of elements in the array.
*/
#define TASK_CELL_OVERLAP(TYPE, ARRAY, COUNT) \
__attribute__((always_inline)) INLINE static size_t \
task_cell_overlap_##TYPE(const struct cell *restrict ci, \
const struct cell *restrict cj) { \
\
if (ci == NULL || cj == NULL) return 0; \
\
if (ci->ARRAY <= cj->ARRAY && \
ci->ARRAY + ci->COUNT >= cj->ARRAY + cj->COUNT) { \
return cj->COUNT; \
} else if (cj->ARRAY <= ci->ARRAY && \
cj->ARRAY + cj->COUNT >= ci->ARRAY + ci->COUNT) { \
return ci->COUNT; \
} \
\
return 0; \
}
TASK_CELL_OVERLAP(part, hydro.parts, hydro.count);
TASK_CELL_OVERLAP(gpart, grav.parts, grav.count);
TASK_CELL_OVERLAP(spart, stars.parts, stars.count);
TASK_CELL_OVERLAP(sink, sinks.parts, sinks.count);
TASK_CELL_OVERLAP(bpart, black_holes.parts, black_holes.count);
/**
* @brief Returns the #task_actions for a given task.
*
* @param t The #task.
*/
__attribute__((always_inline)) INLINE static enum task_actions task_acts_on(
const struct task *t) {
switch (t->type) {
case task_type_none:
return task_action_none;
break;
case task_type_drift_part:
case task_type_sort:
case task_type_ghost:
case task_type_extra_ghost:
case task_type_cooling:
case task_type_end_hydro_force:
return task_action_part;
break;
case task_type_star_formation:
case task_type_star_formation_sink:
case task_type_sink_formation:
return task_action_all;
case task_type_drift_spart:
case task_type_stars_ghost:
case task_type_stars_sort:
case task_type_stars_resort:
return task_action_spart;
break;
case task_type_drift_sink:
case task_type_sink_density_ghost:
return task_action_sink;
break;
case task_type_drift_bpart:
case task_type_bh_density_ghost:
case task_type_bh_swallow_ghost3:
return task_action_bpart;
break;
case task_type_rt_ghost1:
case task_type_rt_ghost2:
case task_type_rt_tchem:
case task_type_rt_sort:
return task_action_part;
break;
case task_type_self:
case task_type_pair:
case task_type_sub_self:
case task_type_sub_pair:
switch (t->subtype) {
case task_subtype_density:
case task_subtype_gradient:
case task_subtype_force:
case task_subtype_limiter:
return task_action_part;
break;
case task_subtype_stars_density:
case task_subtype_stars_feedback:
return task_action_all;
break;
case task_subtype_bh_density:
case task_subtype_bh_feedback:
case task_subtype_bh_swallow:
case task_subtype_do_gas_swallow:
return task_action_all;
break;
case task_subtype_do_bh_swallow:
return task_action_bpart;
break;
case task_subtype_sink_density:
case task_subtype_sink_do_gas_swallow:
case task_subtype_sink_do_sink_swallow:
case task_subtype_sink_swallow:
return task_action_all;
case task_subtype_rt_transport:
case task_subtype_rt_gradient:
return task_action_part;
break;
case task_subtype_grav:
case task_subtype_external_grav:
return task_action_gpart;
break;
default:
#ifdef SWIFT_DEBUG_CHECKS
error("Unknown task_action for task %s/%s", taskID_names[t->type],
subtaskID_names[t->subtype]);
#endif
return task_action_none;
break;
}
break;
case task_type_kick1:
case task_type_kick2:
case task_type_csds:
case task_type_fof_self:
case task_type_fof_pair:
case task_type_fof_attach_self:
case task_type_fof_attach_pair:
case task_type_timestep:
case task_type_timestep_limiter:
case task_type_timestep_sync:
case task_type_send:
case task_type_recv:
if (t->ci->hydro.count > 0 && t->ci->grav.count > 0)
return task_action_all;
else if (t->ci->hydro.count > 0)
return task_action_part;
else if (t->ci->grav.count > 0)
return task_action_gpart;
else {
#ifdef SWIFT_DEBUG_CHECKS
error("Task without particles");
#endif
}
break;
case task_type_init_grav:
case task_type_grav_mm:
case task_type_grav_long_range:
return task_action_multipole;
break;
case task_type_drift_gpart:
case task_type_grav_down:
case task_type_end_grav_force:
return task_action_gpart;
break;
default:
#ifdef SWIFT_DEBUG_CHECKS
error("Unknown task_action for task %s/%s", taskID_names[t->type],
subtaskID_names[t->subtype]);
#endif
return task_action_none;
break;
}
#ifdef SWIFT_DEBUG_CHECKS
error("Unknown task_action for task %s/%s", taskID_names[t->type],
subtaskID_names[t->subtype]);
#endif
/* Silence compiler warnings. We should never get here. */
return task_action_none;
}
/**
* @brief Compute the Jaccard similarity of the data used by two
* different tasks.
*
* @param ta The first #task.
* @param tb The second #task.
*/
float task_overlap(const struct task *restrict ta,
const struct task *restrict tb) {
if (ta == NULL || tb == NULL) return 0.f;
const enum task_actions ta_act = task_acts_on(ta);
const enum task_actions tb_act = task_acts_on(tb);
/* First check if any of the two tasks are of a type that don't
use cells. */
if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;
const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
const int ta_gpart =
(ta_act == task_action_gpart || ta_act == task_action_all);
const int ta_spart =
(ta_act == task_action_spart || ta_act == task_action_all);
const int ta_sink = (ta_act == task_action_sink || ta_act == task_action_all);
const int ta_bpart =
(ta_act == task_action_bpart || ta_act == task_action_all);
const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
const int tb_gpart =
(tb_act == task_action_gpart || tb_act == task_action_all);
const int tb_spart =
(tb_act == task_action_spart || tb_act == task_action_all);
const int tb_sink = (tb_act == task_action_sink || tb_act == task_action_all);
const int tb_bpart =
(tb_act == task_action_bpart || tb_act == task_action_all);
/* In the case where both tasks act on parts */
if (ta_part && tb_part) {
/* Compute the union of the cell data. */
size_t size_union = 0;
if (ta->ci != NULL) size_union += ta->ci->hydro.count;
if (ta->cj != NULL) size_union += ta->cj->hydro.count;
if (tb->ci != NULL) size_union += tb->ci->hydro.count;
if (tb->cj != NULL) size_union += tb->cj->hydro.count;
if (size_union == 0) return 0.f;
/* Compute the intersection of the cell data. */
const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
task_cell_overlap_part(ta->ci, tb->cj) +
task_cell_overlap_part(ta->cj, tb->ci) +
task_cell_overlap_part(ta->cj, tb->cj);
return ((float)size_intersect) / (size_union - size_intersect);
}
/* In the case where both tasks act on gparts */
else if (ta_gpart && tb_gpart) {
/* Compute the union of the cell data. */
size_t size_union = 0;
if (ta->ci != NULL) size_union += ta->ci->grav.count;
if (ta->cj != NULL) size_union += ta->cj->grav.count;
if (tb->ci != NULL) size_union += tb->ci->grav.count;
if (tb->cj != NULL) size_union += tb->cj->grav.count;
if (size_union == 0) return 0.f;
/* Compute the intersection of the cell data. */
const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
task_cell_overlap_gpart(ta->ci, tb->cj) +
task_cell_overlap_gpart(ta->cj, tb->ci) +
task_cell_overlap_gpart(ta->cj, tb->cj);
return ((float)size_intersect) / (size_union - size_intersect);
}
/* In the case where both tasks act on sparts */
else if (ta_spart && tb_spart) {
/* Compute the union of the cell data. */
size_t size_union = 0;
if (ta->ci != NULL) size_union += ta->ci->stars.count;
if (ta->cj != NULL) size_union += ta->cj->stars.count;
if (tb->ci != NULL) size_union += tb->ci->stars.count;
if (tb->cj != NULL) size_union += tb->cj->stars.count;
if (size_union == 0) return 0.f;
/* Compute the intersection of the cell data. */
const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
task_cell_overlap_spart(ta->ci, tb->cj) +
task_cell_overlap_spart(ta->cj, tb->ci) +
task_cell_overlap_spart(ta->cj, tb->cj);
return ((float)size_intersect) / (size_union - size_intersect);
}
/* In the case where both tasks act on sink */
else if (ta_sink && tb_sink) {
/* Compute the union of the cell data. */
size_t size_union = 0;
if (ta->ci != NULL) size_union += ta->ci->sinks.count;
if (ta->cj != NULL) size_union += ta->cj->sinks.count;
if (tb->ci != NULL) size_union += tb->ci->sinks.count;
if (tb->cj != NULL) size_union += tb->cj->sinks.count;
if (size_union == 0) return 0.f;
/* Compute the intersection of the cell data. */
const size_t size_intersect = task_cell_overlap_spart(ta->ci, tb->ci) +
task_cell_overlap_sink(ta->ci, tb->cj) +
task_cell_overlap_sink(ta->cj, tb->ci) +
task_cell_overlap_sink(ta->cj, tb->cj);
return ((float)size_intersect) / (size_union - size_intersect);
}
/* In the case where both tasks act on bparts */
else if (ta_bpart && tb_bpart) {
/* Compute the union of the cell data. */
size_t size_union = 0;
if (ta->ci != NULL) size_union += ta->ci->black_holes.count;
if (ta->cj != NULL) size_union += ta->cj->black_holes.count;
if (tb->ci != NULL) size_union += tb->ci->black_holes.count;
if (tb->cj != NULL) size_union += tb->cj->black_holes.count;
if (size_union == 0) return 0.f;
/* Compute the intersection of the cell data. */
const size_t size_intersect = task_cell_overlap_bpart(ta->ci, tb->ci) +
task_cell_overlap_bpart(ta->ci, tb->cj) +
task_cell_overlap_bpart(ta->cj, tb->ci) +
task_cell_overlap_bpart(ta->cj, tb->cj);
return ((float)size_intersect) / (size_union - size_intersect);
}
/* Else, no overlap */
return 0.f;
}
/**
* @brief Unlock the cell held by this task.
*
* @param t The #task.
*/
void task_unlock(struct task *t) {
const enum task_types type = t->type;
const enum task_subtypes subtype = t->subtype;
struct cell *ci = t->ci, *cj = t->cj;
/* Act based on task type. */
switch (type) {
case task_type_kick1:
case task_type_kick2:
case task_type_csds:
case task_type_timestep:
cell_unlocktree(ci);
cell_gunlocktree(ci);
break;
case task_type_drift_part:
case task_type_sort:
case task_type_ghost:
case task_type_extra_ghost:
case task_type_end_hydro_force:
case task_type_timestep_limiter:
case task_type_timestep_sync:
case task_type_rt_ghost1:
case task_type_rt_ghost2:
case task_type_rt_tchem:
case task_type_rt_sort:
case task_type_rt_advance_cell_time:
cell_unlocktree(ci);
break;
case task_type_drift_gpart:
case task_type_end_grav_force:
cell_gunlocktree(ci);
break;
case task_type_drift_sink:
cell_sink_unlocktree(ci);
break;
case task_type_stars_sort:
case task_type_stars_resort:
cell_sunlocktree(ci);
break;
case task_type_self:
case task_type_sub_self:
if (subtype == task_subtype_grav) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_gunlocktree(ci);
cell_munlocktree(ci);
#endif
} else if ((subtype == task_subtype_sink_density) ||
(subtype == task_subtype_sink_swallow) ||
(subtype == task_subtype_sink_do_gas_swallow)) {
cell_sink_unlocktree(ci);
cell_unlocktree(ci);
} else if (subtype == task_subtype_sink_do_sink_swallow) {
cell_sink_unlocktree(ci);
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_prep1) ||
(subtype == task_subtype_stars_prep2) ||
(subtype == task_subtype_stars_feedback)) {
cell_sunlocktree(ci);
cell_unlocktree(ci);
} else if ((subtype == task_subtype_bh_density) ||
(subtype == task_subtype_bh_feedback) ||
(subtype == task_subtype_bh_swallow) ||
(subtype == task_subtype_do_gas_swallow)) {
cell_bunlocktree(ci);
cell_unlocktree(ci);
} else if (subtype == task_subtype_do_bh_swallow) {
cell_bunlocktree(ci);
} else if (subtype == task_subtype_limiter) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_unlocktree(ci);
#endif
} else { /* hydro */
cell_unlocktree(ci);
}
break;
case task_type_pair:
case task_type_sub_pair:
if (subtype == task_subtype_grav) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_gunlocktree(ci);
cell_gunlocktree(cj);
cell_munlocktree(ci);
cell_munlocktree(cj);
#endif
} else if ((subtype == task_subtype_sink_density) ||
(subtype == task_subtype_sink_swallow) ||
(subtype == task_subtype_sink_do_gas_swallow)) {
cell_sink_unlocktree(ci);
cell_sink_unlocktree(cj);
cell_unlocktree(ci);
cell_unlocktree(cj);
} else if (subtype == task_subtype_sink_do_sink_swallow) {
cell_sink_unlocktree(ci);
cell_sink_unlocktree(cj);
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_prep1) ||
(subtype == task_subtype_stars_prep2) ||
(subtype == task_subtype_stars_feedback)) {
cell_sunlocktree(ci);
cell_sunlocktree(cj);
cell_unlocktree(ci);
cell_unlocktree(cj);
} else if ((subtype == task_subtype_bh_density) ||
(subtype == task_subtype_bh_feedback) ||
(subtype == task_subtype_bh_swallow) ||
(subtype == task_subtype_do_gas_swallow)) {
cell_bunlocktree(ci);
cell_bunlocktree(cj);
cell_unlocktree(ci);
cell_unlocktree(cj);
} else if (subtype == task_subtype_do_bh_swallow) {
cell_bunlocktree(ci);
cell_bunlocktree(cj);
} else if (subtype == task_subtype_limiter) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_unlocktree(ci);
cell_unlocktree(cj);
#endif
} else { /* hydro */
cell_unlocktree(ci);
cell_unlocktree(cj);
}
break;
case task_type_grav_down:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_gunlocktree(ci);
cell_munlocktree(ci);
#endif
break;
case task_type_grav_long_range:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_munlocktree(ci);
#endif
break;
case task_type_grav_mm:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
cell_munlocktree(ci);
cell_munlocktree(cj);
#endif
break;
case task_type_fof_self:
case task_type_fof_attach_self:
cell_gunlocktree(ci);
break;
case task_type_fof_pair:
case task_type_fof_attach_pair:
cell_gunlocktree(ci);
cell_gunlocktree(cj);
break;
case task_type_star_formation:
cell_unlocktree(ci);
cell_sunlocktree(ci);
cell_gunlocktree(ci);
break;
case task_type_star_formation_sink:
cell_sink_unlocktree(ci);
cell_sunlocktree(ci);
cell_gunlocktree(ci);
break;
case task_type_sink_formation:
cell_unlocktree(ci);
cell_sink_unlocktree(ci);
cell_gunlocktree(ci);
break;
default:
break;
}
}
/**
* @brief Try to lock the cells associated with this task.
*
* @param t the #task.
*/
int task_lock(struct task *t) {
const enum task_types type = t->type;
const enum task_subtypes subtype = t->subtype;
struct cell *ci = t->ci, *cj = t->cj;
#ifdef WITH_MPI
int res = 0, err = 0;
MPI_Status stat;
#endif
switch (type) {
/* Communication task? */
case task_type_recv:
case task_type_send:
#ifdef WITH_MPI
/* Check the status of the MPI request. */
if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) {
char buff[MPI_MAX_ERROR_STRING];
int len;
MPI_Error_string(err, buff, &len);
error(
"Failed to test request on send/recv task (type=%s/%s tag=%lld, "
"%s).",
taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff);
}
/* And log deactivation, if logging enabled. */
if (res) {
mpiuse_log_allocation(t->type, t->subtype, &t->req, 0, 0, 0, 0);
}
return res;
#else
error("SWIFT was not compiled with MPI support.");
#endif
break;
case task_type_kick1:
case task_type_kick2:
case task_type_csds:
case task_type_timestep:
if (ci->hydro.hold || ci->grav.phold) return 0;
if (cell_locktree(ci) != 0) return 0;
if (cell_glocktree(ci) != 0) {
cell_unlocktree(ci);
return 0;
}
break;
case task_type_drift_part:
case task_type_sort:
case task_type_ghost:
case task_type_extra_ghost:
case task_type_end_hydro_force:
case task_type_timestep_limiter:
case task_type_timestep_sync:
case task_type_rt_ghost1:
case task_type_rt_ghost2:
case task_type_rt_tchem:
case task_type_rt_sort:
case task_type_rt_advance_cell_time:
if (ci->hydro.hold) return 0;
if (cell_locktree(ci) != 0) return 0;
break;
case task_type_stars_sort:
case task_type_stars_resort:
if (ci->stars.hold) return 0;
if (cell_slocktree(ci) != 0) return 0;
break;
case task_type_drift_gpart:
case task_type_end_grav_force:
if (ci->grav.phold) return 0;
if (cell_glocktree(ci) != 0) return 0;
break;
case task_type_drift_sink:
if (ci->sinks.hold) return 0;
if (cell_sink_locktree(ci) != 0) return 0;
break;
case task_type_self:
case task_type_sub_self:
if (subtype == task_subtype_grav) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the gparts and the m-pole */
if (ci->grav.phold || ci->grav.mhold) return 0;
if (cell_glocktree(ci) != 0)
return 0;
else if (cell_mlocktree(ci) != 0) {
cell_gunlocktree(ci);
return 0;
}
#endif
} else if ((subtype == task_subtype_sink_density) ||
(subtype == task_subtype_sink_swallow) ||
(subtype == task_subtype_sink_do_gas_swallow)) {
if (ci->sinks.hold) return 0;
if (ci->hydro.hold) return 0;
if (cell_sink_locktree(ci) != 0) return 0;
if (cell_locktree(ci) != 0) {
cell_sink_unlocktree(ci);
return 0;
}
} else if (subtype == task_subtype_sink_do_sink_swallow) {
if (ci->sinks.hold) return 0;
if (cell_sink_locktree(ci) != 0) return 0;
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_prep1) ||
(subtype == task_subtype_stars_prep2) ||
(subtype == task_subtype_stars_feedback)) {
if (ci->stars.hold) return 0;
if (ci->hydro.hold) return 0;
if (cell_slocktree(ci) != 0) return 0;
if (cell_locktree(ci) != 0) {
cell_sunlocktree(ci);
return 0;
}
} else if ((subtype == task_subtype_bh_density) ||
(subtype == task_subtype_bh_feedback) ||
(subtype == task_subtype_bh_swallow) ||
(subtype == task_subtype_do_gas_swallow)) {
if (ci->black_holes.hold) return 0;
if (ci->hydro.hold) return 0;
if (cell_blocktree(ci) != 0) return 0;
if (cell_locktree(ci) != 0) {
cell_bunlocktree(ci);
return 0;
}
} else if (subtype == task_subtype_do_bh_swallow) {
if (ci->black_holes.hold) return 0;
if (cell_blocktree(ci) != 0) return 0;
} else if (subtype == task_subtype_limiter) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
if (ci->hydro.hold) return 0;
if (cell_locktree(ci) != 0) return 0;
#endif
} else { /* subtype == hydro */
if (ci->hydro.hold) return 0;
if (cell_locktree(ci) != 0) return 0;
}
break;
case task_type_pair:
case task_type_sub_pair:
if (subtype == task_subtype_grav) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the gparts and the m-pole in both cells */
if (ci->grav.phold || cj->grav.phold) return 0;
if (cell_glocktree(ci) != 0) return 0;
if (cell_glocktree(cj) != 0) {
cell_gunlocktree(ci);
return 0;
} else if (cell_mlocktree(ci) != 0) {
cell_gunlocktree(ci);
cell_gunlocktree(cj);
return 0;
} else if (cell_mlocktree(cj) != 0) {
cell_gunlocktree(ci);
cell_gunlocktree(cj);
cell_munlocktree(ci);
return 0;
}
#endif
} else if ((subtype == task_subtype_sink_density) ||
(subtype == task_subtype_sink_swallow) ||
(subtype == task_subtype_sink_do_gas_swallow)) {
if (ci->sinks.hold || cj->sinks.hold) return 0;
if (ci->hydro.hold || cj->hydro.hold) return 0;
if (cell_sink_locktree(ci) != 0) return 0;
if (cell_sink_locktree(cj) != 0) {
cell_sink_unlocktree(ci);
return 0;
}
if (cell_locktree(ci) != 0) {
cell_sink_unlocktree(ci);
cell_sink_unlocktree(cj);
return 0;
}
if (cell_locktree(cj) != 0) {
cell_sink_unlocktree(ci);
cell_sink_unlocktree(cj);
cell_unlocktree(ci);
return 0;
}
} else if (subtype == task_subtype_sink_do_sink_swallow) {
if (ci->sinks.hold || cj->sinks.hold) return 0;
if (cell_sink_locktree(ci) != 0) return 0;
if (cell_sink_locktree(cj) != 0) {
cell_sink_unlocktree(ci);
return 0;
}
} else if ((subtype == task_subtype_stars_density) ||
(subtype == task_subtype_stars_prep1) ||
(subtype == task_subtype_stars_prep2) ||
(subtype == task_subtype_stars_feedback)) {
/* Lock the stars and the gas particles in both cells */
if (ci->stars.hold || cj->stars.hold) return 0;
if (ci->hydro.hold || cj->hydro.hold) return 0;
if (cell_slocktree(ci) != 0) return 0;
if (cell_slocktree(cj) != 0) {
cell_sunlocktree(ci);
return 0;
}
if (cell_locktree(ci) != 0) {
cell_sunlocktree(ci);
cell_sunlocktree(cj);
return 0;
}
if (cell_locktree(cj) != 0) {
cell_sunlocktree(ci);
cell_sunlocktree(cj);
cell_unlocktree(ci);
return 0;
}
} else if ((subtype == task_subtype_bh_density) ||
(subtype == task_subtype_bh_feedback) ||
(subtype == task_subtype_bh_swallow) ||
(subtype == task_subtype_do_gas_swallow)) {
/* Lock the BHs and the gas particles in both cells */
if (ci->black_holes.hold || cj->black_holes.hold) return 0;
if (ci->hydro.hold || cj->hydro.hold) return 0;
if (cell_blocktree(ci) != 0) return 0;
if (cell_blocktree(cj) != 0) {
cell_bunlocktree(ci);
return 0;
}
if (cell_locktree(ci) != 0) {
cell_bunlocktree(ci);
cell_bunlocktree(cj);
return 0;
}
if (cell_locktree(cj) != 0) {
cell_bunlocktree(ci);
cell_bunlocktree(cj);
cell_unlocktree(ci);
return 0;
}
} else if (subtype == task_subtype_do_bh_swallow) {
if (ci->black_holes.hold || cj->black_holes.hold) return 0;
if (cell_blocktree(ci) != 0) return 0;
if (cell_blocktree(cj) != 0) {
cell_bunlocktree(ci);
return 0;
}
} else if (subtype == task_subtype_limiter) {
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
if (ci->hydro.hold || cj->hydro.hold) return 0;
if (cell_locktree(ci) != 0) return 0;
if (cell_locktree(cj) != 0) {
cell_unlocktree(ci);
return 0;
}
#endif
} else { /* subtype == hydro */
/* Lock the parts in both cells */
if (ci->hydro.hold || cj->hydro.hold) return 0;
if (cell_locktree(ci) != 0) return 0;
if (cell_locktree(cj) != 0) {
cell_unlocktree(ci);
return 0;
}
}
break;
case task_type_grav_down:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the gparts and the m-poles */
if (ci->grav.phold || ci->grav.mhold) return 0;
if (cell_glocktree(ci) != 0)
return 0;
else if (cell_mlocktree(ci) != 0) {
cell_gunlocktree(ci);
return 0;
}
#endif
break;
case task_type_grav_long_range:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock the m-poles */
if (ci->grav.mhold) return 0;
if (cell_mlocktree(ci) != 0) return 0;
#endif
break;
case task_type_grav_mm:
#ifdef SWIFT_TASKS_WITHOUT_ATOMICS
/* Lock both m-poles */
if (ci->grav.mhold || cj->grav.mhold) return 0;
if (cell_mlocktree(ci) != 0) return 0;
if (cell_mlocktree(cj) != 0) {
cell_munlocktree(ci);
return 0;
}
#endif
break;
case task_type_fof_self:
case task_type_fof_attach_self:
/* Lock the gpart as this this what we act on */
if (ci->grav.phold) return 0;
if (cell_glocktree(ci) != 0) return 0;
break;
case task_type_fof_pair:
case task_type_fof_attach_pair:
/* Lock the gpart as this this what we act on */
if (ci->grav.phold || cj->grav.phold) return 0;
if (cell_glocktree(ci) != 0) return 0;
if (cell_glocktree(cj) != 0) {
cell_gunlocktree(ci);
return 0;
}
break;
case task_type_star_formation:
/* Lock the gas, gravity and star particles */
if (ci->hydro.hold || ci->stars.hold || ci->grav.phold) return 0;
if (cell_locktree(ci) != 0) return 0;
if (cell_slocktree(ci) != 0) {
cell_unlocktree(ci);
return 0;
}
if (cell_glocktree(ci) != 0) {
cell_unlocktree(ci);
cell_sunlocktree(ci);
return 0;
}
break;
case task_type_star_formation_sink:
/* Lock the sinks, gravity and star particles */
if (ci->sinks.hold || ci->stars.hold || ci->grav.phold) return 0;
if (cell_sink_locktree(ci) != 0) return 0;
if (cell_slocktree(ci) != 0) {
cell_sink_unlocktree(ci);
return 0;
}
if (cell_glocktree(ci) != 0) {
cell_sink_unlocktree(ci);
cell_sunlocktree(ci);
return 0;
}
break;
case task_type_sink_formation:
/* Lock the gas, sinks and star particles */
if (ci->hydro.hold || ci->sinks.hold || ci->grav.phold) return 0;
if (cell_locktree(ci) != 0) return 0;
if (cell_sink_locktree(ci) != 0) {
cell_unlocktree(ci);
return 0;
}
if (cell_glocktree(ci) != 0) {
cell_unlocktree(ci);
cell_sink_unlocktree(ci);
return 0;
}
break;
default:
break;
}
/* If we made it this far, we've got a lock. */
return 1;
}
/**
* @brief Returns a pointer to the unique task unlocked by this task.
*
* The task MUST have only dependence!
*
* @param The #task.
*/
struct task *task_get_unique_dependent(const struct task *t) {
#ifdef SWIFT_DEBUG_CHECKS
if (t->nr_unlock_tasks != 1)
error("Task is unlocking more than one dependence!");
#endif
return t->unlock_tasks[0];
}
/**
* @brief Print basic information about a task.
*
* @param t The #task.
*/
void task_print(const struct task *t) {
message("Type:'%s' sub_type:'%s' wait=%d nr_unlocks=%d skip=%d",
taskID_names[t->type], subtaskID_names[t->subtype], t->wait,
t->nr_unlock_tasks, t->skip);
}
/**
* @brief Get the group name of a task.
*
* This is used to group tasks with similar actions in the task dependency
* graph.
*
* @param type The #task type.
* @param subtype The #task subtype.
* @param cluster (return) The group name (should be allocated)
*/
void task_get_group_name(int type, int subtype, char *cluster) {
if (type == task_type_grav_long_range || type == task_type_grav_mm) {
strcpy(cluster, "Gravity");
return;
}
switch (subtype) {
case task_subtype_density:
strcpy(cluster, "Density");
break;
case task_subtype_gradient:
if (type == task_type_send || type == task_type_recv) {
strcpy(cluster, "None");
} else {
strcpy(cluster, "Gradient");
}
break;
case task_subtype_force:
strcpy(cluster, "Force");
break;
case task_subtype_grav:
strcpy(cluster, "Gravity");
break;
case task_subtype_limiter:
if (type == task_type_send || type == task_type_recv) {
strcpy(cluster, "None");
} else {
strcpy(cluster, "Timestep_limiter");
}
break;
case task_subtype_stars_density:
strcpy(cluster, "StarsDensity");
break;
case task_subtype_stars_prep1:
strcpy(cluster, "StarsKickPrep1");
break;
case task_subtype_stars_prep2:
strcpy(cluster, "StarsKickPrep2");
break;
case task_subtype_stars_feedback:
strcpy(cluster, "StarsFeedback");
break;
case task_subtype_bh_density:
strcpy(cluster, "BHDensity");
break;
case task_subtype_bh_swallow:
strcpy(cluster, "BHSwallow");
break;
case task_subtype_do_gas_swallow:
strcpy(cluster, "DoGasSwallow");
break;
case task_subtype_do_bh_swallow:
strcpy(cluster, "DoBHSwallow");
break;
case task_subtype_bh_feedback:
strcpy(cluster, "BHFeedback");
break;
case task_subtype_rt_gradient:
if (type == task_type_send || type == task_type_recv) {
strcpy(cluster, "None");
} else {
strcpy(cluster, "RTgradient");
}
break;
case task_subtype_rt_transport:
if (type == task_type_send || type == task_type_recv) {
strcpy(cluster, "None");
} else {
strcpy(cluster, "RTtransport");
}
break;
case task_subtype_sink_density:
strcpy(cluster, "SinkDensity");
break;
case task_subtype_sink_swallow:
strcpy(cluster, "SinkSwallow");
break;
case task_subtype_sink_do_sink_swallow:
strcpy(cluster, "DoSinkSwallow");
break;
case task_subtype_sink_do_gas_swallow:
strcpy(cluster, "DoGasSwallow");
break;
default:
strcpy(cluster, "None");
break;
}
}
/**
* @brief Generate the full name of a #task.
*
* @param type The #task type.
* @param subtype The #task type.
* @param name (return) The formatted string
*/
void task_get_full_name(int type, int subtype, char *name) {
#ifdef SWIFT_DEBUG_CHECKS
/* Check input */
if (type >= task_type_count) error("Unknown task type %i", type);
if (subtype >= task_subtype_count)
error("Unknown task subtype %i with type %s", subtype, taskID_names[type]);
#endif
/* Full task name */
if (subtype == task_subtype_none)
sprintf(name, "%s", taskID_names[type]);
else
sprintf(name, "%s_%s", taskID_names[type], subtaskID_names[subtype]);
}
void task_create_name_files(const char *file_prefix) {
char file_name[200];
sprintf(file_name, "%s_task_types.txt", file_prefix);
FILE *file = fopen(file_name, "w");
if (file == NULL) error("Could not create file '%s'.", file_name);
fprintf(file, "# type\tname\n");
for (int type = 0; type < task_type_count; type++) {
fprintf(file, "%i\t%s\n", type, taskID_names[type]);
}
fclose(file);
sprintf(file_name, "%s_task_subtypes.txt", file_prefix);
file = fopen(file_name, "w");
if (file == NULL) error("Could not create file '%s'.", file_name);
fprintf(file, "# subtype\tname\n");
for (int subtype = 0; subtype < task_subtype_count; subtype++) {
fprintf(file, "%i\t%s\n", subtype, subtaskID_names[subtype]);
}
fclose(file);
}
#ifdef WITH_MPI
/**
* @brief Create global communicators for each of the subtasks.
*/
void task_create_mpi_comms(void) {
for (int i = 0; i < task_subtype_count; i++) {
MPI_Comm_dup(MPI_COMM_WORLD, &subtaskMPI_comms[i]);
}
}
/**
* @brief Create global communicators for each of the subtasks.
*/
void task_free_mpi_comms(void) {
for (int i = 0; i < task_subtype_count; i++) {
MPI_Comm_free(&subtaskMPI_comms[i]);
}
}
#endif
/**
* @brief dump all the tasks of all the known engines into a file for
* postprocessing.
*
* Dumps the information to a file "thread_info-stepn.dat" where n is the
* given step value, or "thread_info_MPI-stepn.dat", if we are running
* under MPI. Note if running under MPI all the ranks are dumped into this
* one file, which has an additional field to identify the rank.
*
* @param e the #engine
* @param step the current step.
*/
void task_dump_all(struct engine *e, int step) {
#ifdef SWIFT_DEBUG_TASKS
const ticks tic = getticks();
/* Need this to convert ticks to seconds. */
const unsigned long long cpufreq = clocks_get_cpufreq();
#ifdef WITH_MPI
/* Make sure output file is empty, only on one rank. */
char dumpfile[35];
snprintf(dumpfile, sizeof(dumpfile), "thread_info_MPI-step%d.dat", step);
FILE *file_thread;
if (engine_rank == 0) {
file_thread = fopen(dumpfile, "w");
if (file_thread == NULL)
error("Could not create/erase file '%s'.", dumpfile);
fclose(file_thread);
}
MPI_Barrier(MPI_COMM_WORLD);
for (int i = 0; i < e->nr_nodes; i++) {
/* Rank 0 decides the index of the writing node, this happens
* one-by-one. */
int kk = i;
MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (i == engine_rank) {
/* Open file and position at end. */
file_thread = fopen(dumpfile, "a");
if (file_thread == NULL)
error("Could not open file '%s' for writing.", dumpfile);
/* Add some information to help with the plots and conversion of ticks to
* seconds. */
fprintf(file_thread, " %03d 0 0 0 0 %lld %lld %lld %lld %lld 0 0 %lld\n",
engine_rank, (long long int)e->tic_step,
(long long int)e->toc_step, e->updates, e->g_updates,
e->s_updates, cpufreq);
for (int l = 0; l < e->sched.nr_tasks; l++) {
if (!e->sched.tasks[l].implicit &&
e->sched.tasks[l].tic > e->tic_step) {
fprintf(
file_thread, " %03i %i %i %i %i %lli %lli %i %i %i %i %lli %i\n",
engine_rank, e->sched.tasks[l].rid, e->sched.tasks[l].type,
e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
(long long int)e->sched.tasks[l].tic,
(long long int)e->sched.tasks[l].toc,
(e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->hydro.count
: 0,
(e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->hydro.count
: 0,
(e->sched.tasks[l].ci != NULL) ? e->sched.tasks[l].ci->grav.count
: 0,
(e->sched.tasks[l].cj != NULL) ? e->sched.tasks[l].cj->grav.count
: 0,
e->sched.tasks[l].flags, e->sched.tasks[l].sid);
}
}
fclose(file_thread);
}
/* And we wait for all to synchronize. */
MPI_Barrier(MPI_COMM_WORLD);
}
#else
/* Non-MPI, so just a single engine's worth of tasks to dump. */
char dumpfile[32];
snprintf(dumpfile, sizeof(dumpfile), "thread_info-step%d.dat", step);
FILE *file_thread;
file_thread = fopen(dumpfile, "w");
if (file_thread == NULL) error("Could not create file '%s'.", dumpfile);
/* Add some information to help with the plots and conversion of ticks to
* seconds. */
fprintf(file_thread, " %d %d %d %d %lld %lld %lld %lld %lld %d %lld\n", -2,
-1, -1, 1, (unsigned long long)e->tic_step,
(unsigned long long)e->toc_step, e->updates, e->g_updates,
e->s_updates, 0, cpufreq);
for (int l = 0; l < e->sched.nr_tasks; l++) {
if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > e->tic_step) {
fprintf(
file_thread, " %i %i %i %i %lli %lli %i %i %i %i %i\n",
e->sched.tasks[l].rid, e->sched.tasks[l].type,
e->sched.tasks[l].subtype, (e->sched.tasks[l].cj == NULL),
(unsigned long long)e->sched.tasks[l].tic,
(unsigned long long)e->sched.tasks[l].toc,
(e->sched.tasks[l].ci == NULL) ? 0
: e->sched.tasks[l].ci->hydro.count,
(e->sched.tasks[l].cj == NULL) ? 0
: e->sched.tasks[l].cj->hydro.count,
(e->sched.tasks[l].ci == NULL) ? 0 : e->sched.tasks[l].ci->grav.count,
(e->sched.tasks[l].cj == NULL) ? 0 : e->sched.tasks[l].cj->grav.count,
e->sched.tasks[l].sid);
}
}
fclose(file_thread);
#endif // WITH_MPI
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
#endif // SWIFT_DEBUG_TASKS
}
/**
* @brief Generate simple statistics about the times used by the tasks of
* all the engines and write these into two format, a human readable
* version for debugging and one intented for inclusion as the fixed
* costs for repartitioning.
*
* Note that when running under MPI all the tasks can be summed into this single
* file. In the fuller, human readable file, the statistics included are the
* number of task of each type/subtype followed by the minimum, maximum, mean
* and total time taken and the same numbers for the start of the task,
* in millisec and then the fixed costs value.
*
* If header is set, only the fixed costs value is written into the output
* file in a format that is suitable for inclusion in SWIFT (as
* partition_fixed_costs.h).
*
* @param dumpfile name of the file for the output.
* @param e the #engine
* @param dump_tasks_threshold Fraction of the step time above whic any task
* triggers a call to task_dump_all().
* @param header whether to write a header include file.
* @param allranks do the statistics over all ranks, if not just the current
* one, only used if header is false.
*/
void task_dump_stats(const char *dumpfile, struct engine *e,
float dump_tasks_threshold, int header, int allranks) {
const ticks function_tic = getticks();
/* Need arrays for sum, min and max across all types and subtypes. */
double sum[task_type_count][task_subtype_count];
double tsum[task_type_count][task_subtype_count];
double min[task_type_count][task_subtype_count];
double tmin[task_type_count][task_subtype_count];
double max[task_type_count][task_subtype_count];
double tmax[task_type_count][task_subtype_count];
int count[task_type_count][task_subtype_count];
for (int j = 0; j < task_type_count; j++) {
for (int k = 0; k < task_subtype_count; k++) {
sum[j][k] = 0.0;
tsum[j][k] = 0.0;
count[j][k] = 0;
min[j][k] = DBL_MAX;
tmin[j][k] = DBL_MAX;
max[j][k] = 0.0;
tmax[j][k] = 0.0;
}
}
double stepdt = (double)e->toc_step - (double)e->tic_step;
double total[1] = {0.0};
int dumped_plot_data = 0;
for (int l = 0; l < e->sched.nr_tasks; l++) {
int type = e->sched.tasks[l].type;
/* Skip implicit tasks and tasks that have not ran. */
if (!e->sched.tasks[l].implicit && e->sched.tasks[l].tic > 0) {
int subtype = e->sched.tasks[l].subtype;
double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
sum[type][subtype] += dt;
double tic = (double)e->sched.tasks[l].tic;
tsum[type][subtype] += tic;
count[type][subtype] += 1;
if (dt < min[type][subtype]) {
min[type][subtype] = dt;
}
if (tic < tmin[type][subtype]) {
tmin[type][subtype] = tic;
}
if (dt > max[type][subtype]) {
max[type][subtype] = dt;
}
if (tic > tmax[type][subtype]) {
tmax[type][subtype] = tic;
}
total[0] += dt;
/* Check if this is a problematic task and make a report. */
if (dump_tasks_threshold > 0. && dt / stepdt > dump_tasks_threshold) {
if (e->verbose)
message(
"Long running task detected: %s/%s using %.1f%% of step runtime",
taskID_names[type], subtaskID_names[subtype],
dt / stepdt * 100.0);
if (!dumped_plot_data) {
#ifdef SWIFT_DEBUG_TASKS
task_dump_all(e, e->step + 1);
#endif
dumped_plot_data = 1;
}
}
}
}
#ifdef WITH_MPI
if (allranks || header) {
/* Get these from all ranks for output from rank 0. Could wrap these into a
* single operation. */
size_t size = task_type_count * task_subtype_count;
int res =
MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &sum[0][0]), &sum[0][0],
size, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &tsum[0][0]),
&tsum[0][0], size, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task tsums");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &count[0][0]),
&count[0][0], size, MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &min[0][0]), &min[0][0],
size, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &tmin[0][0]),
&tmin[0][0], size, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &max[0][0]), &max[0][0],
size, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : &tmax[0][0]),
&tmax[0][0], size, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
}
if (!allranks || (engine_rank == 0 && (allranks || header))) {
#endif
FILE *dfile = fopen(dumpfile, "w");
if (dfile == NULL) error("Could not create file '%s'.", dumpfile);
if (header) {
fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n");
fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n");
} else {
fprintf(dfile,
"# task ntasks min max sum mean percent mintic maxtic"
" meantic fixed_cost\n");
}
for (int j = 0; j < task_type_count; j++) {
const char *taskID = taskID_names[j];
for (int k = 0; k < task_subtype_count; k++) {
if (sum[j][k] > 0.0) {
/* Fixed cost is in .1ns as we want to compare between runs in
* some absolute units. */
double mean = sum[j][k] / (double)count[j][k];
int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f);
if (header) {
fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
k, fixed_cost, taskID, subtaskID_names[k]);
} else {
double perc = 100.0 * sum[j][k] / total[0];
double mintic = tmin[j][k] - e->tic_step;
double maxtic = tmax[j][k] - e->tic_step;
double meantic = tsum[j][k] / (double)count[j][k] - e->tic_step;
fprintf(dfile,
"%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f"
" %14.4f %14.4f %14.4f %10d\n",
taskID, subtaskID_names[k], count[j][k],
clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]),
clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), perc,
clocks_from_ticks(mintic), clocks_from_ticks(maxtic),
clocks_from_ticks(meantic), fixed_cost);
}
}
}
}
fclose(dfile);
#ifdef WITH_MPI
}
#endif
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - function_tic),
clocks_getunit());
}
/**
* @brief dump all the active tasks of all the known engines into files.
*
* Dumps the information into file "task_dump-stepn.dat" where n is the given
* step value, or files "task_dump_MPI-stepn.dat_rank", if we are running
* under MPI. Note if running under MPI all the ranks are dumped into separate
* files to avoid interaction with other MPI calls that may be blocking at the
* time. Very similar to task_dump_all() except for the additional fields used
* in task debugging and we record tasks that have not ran (i.e !skip, but toc
* == 0) and how many waits are still active.
*
* @param e the #engine
*/
void task_dump_active(struct engine *e) {
const ticks tic = getticks();
/* Need this to convert ticks to seconds. */
unsigned long long cpufreq = clocks_get_cpufreq();
char dumpfile[35];
#ifdef WITH_MPI
snprintf(dumpfile, sizeof(dumpfile), "task_dump_MPI-step%d.dat_%d", e->step,
e->nodeID);
#else
snprintf(dumpfile, sizeof(dumpfile), "task_dump-step%d.dat", e->step);
#endif
FILE *file_thread = fopen(dumpfile, "w");
if (file_thread == NULL) error("Could not create file '%s'.", dumpfile);
fprintf(file_thread,
"# rank otherrank type subtype waits pair tic toc"
" ci.hydro.count cj.hydro.count ci.grav.count cj.grav.count"
" flags\n");
/* Add some information to help with the plots and conversion of ticks to
* seconds. */
fprintf(file_thread, "%i 0 none none -1 0 %lld %lld %lld %lld %lld 0 %lld\n",
engine_rank, (long long int)e->tic_step, (long long int)e->toc_step,
e->updates, e->g_updates, e->s_updates, cpufreq);
for (int l = 0; l < e->sched.nr_tasks; l++) {
struct task *t = &e->sched.tasks[l];
/* Not implicit and not skipped. */
if (!t->implicit && !t->skip) {
/* Get destination rank of MPI requests. */
int paired = (t->cj != NULL);
int otherrank = t->ci->nodeID;
if (paired) otherrank = t->cj->nodeID;
fprintf(file_thread, "%i %i %s %s %i %i %lli %lli %i %i %i %i %lli\n",
engine_rank, otherrank, taskID_names[t->type],
subtaskID_names[t->subtype], t->wait, paired,
(long long int)t->tic, (long long int)t->toc,
(t->ci != NULL) ? t->ci->hydro.count : 0,
(t->cj != NULL) ? t->cj->hydro.count : 0,
(t->ci != NULL) ? t->ci->grav.count : 0,
(t->cj != NULL) ? t->cj->grav.count : 0, t->flags);
}
}
fclose(file_thread);
if (e->verbose)
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Return the #task_categories of a given #task.
*
* @param t The #task.
*/
enum task_categories task_get_category(const struct task *t) {
switch (t->type) {
case task_type_cooling:
return task_category_cooling;
case task_type_csds:
return task_category_csds;
case task_type_star_formation:
case task_type_star_formation_sink:
return task_category_star_formation;
case task_type_sink_density_ghost:
case task_type_sink_formation:
return task_category_sink;
case task_type_drift_part:
case task_type_drift_spart:
case task_type_drift_sink:
case task_type_drift_bpart:
case task_type_drift_gpart:
return task_category_drift;
case task_type_sort:
case task_type_stars_sort:
return task_category_sort;
case task_type_stars_resort:
return task_category_resort;
case task_type_send:
case task_type_recv:
return task_category_mpi;
case task_type_pack:
case task_type_unpack:
return task_category_pack;
case task_type_kick1:
case task_type_kick2:
case task_type_timestep:
case task_type_collect:
return task_category_time_integration;
case task_type_timestep_limiter:
return task_category_limiter;
case task_type_timestep_sync:
return task_category_sync;
case task_type_ghost:
case task_type_extra_ghost:
case task_type_end_hydro_force:
return task_category_hydro;
case task_type_stars_ghost:
case task_type_stars_prep_ghost1:
case task_type_hydro_prep_ghost1:
case task_type_stars_prep_ghost2:
return task_category_feedback;
case task_type_bh_density_ghost:
case task_type_bh_swallow_ghost2:
return task_category_black_holes;
case task_type_init_grav:
case task_type_grav_long_range:
case task_type_grav_mm:
case task_type_grav_down:
case task_type_end_grav_force:
return task_category_gravity;
case task_type_fof_self:
case task_type_fof_pair:
case task_type_fof_attach_self:
case task_type_fof_attach_pair:
return task_category_fof;
case task_type_rt_in:
case task_type_rt_ghost1:
case task_type_rt_ghost2:
case task_type_rt_transport_out:
case task_type_rt_tchem:
case task_type_rt_out:
case task_type_rt_sort:
case task_type_rt_advance_cell_time:
return task_category_rt;
case task_type_neutrino_weight:
return task_category_neutrino;
case task_type_self:
case task_type_pair:
case task_type_sub_self:
case task_type_sub_pair: {
switch (t->subtype) {
case task_subtype_density:
case task_subtype_gradient:
case task_subtype_force:
return task_category_hydro;
case task_subtype_limiter:
return task_category_limiter;
case task_subtype_grav:
case task_subtype_external_grav:
return task_category_gravity;
case task_subtype_stars_density:
case task_subtype_stars_prep1:
case task_subtype_stars_prep2:
case task_subtype_stars_feedback:
return task_category_feedback;
case task_subtype_bh_density:
case task_subtype_bh_swallow:
case task_subtype_do_gas_swallow:
case task_subtype_do_bh_swallow:
case task_subtype_bh_feedback:
return task_category_black_holes;
case task_subtype_sink_density:
case task_subtype_sink_swallow:
case task_subtype_sink_do_sink_swallow:
case task_subtype_sink_do_gas_swallow:
return task_category_sink;
case task_subtype_rt_gradient:
case task_subtype_rt_transport:
return task_category_rt;
default:
return task_category_others;
}
}
default:
return task_category_others;
}
}