Commit 8a6c3b41 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'task_category_timing' into 'master'

Task category timing

See merge request !1043
parents 2e420457 9d05f210
......@@ -1670,6 +1670,12 @@ void engine_rebuild(struct engine *e, int repartitioned,
e->forcerebuild = 0;
e->restarting = 0;
/* Report the time spent in the different task categories */
if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads);
/* Give some breathing space */
scheduler_free_tasks(&e->sched);
/* Re-build the space. */
space_rebuild(e->s, repartitioned, e->verbose);
......@@ -2114,6 +2120,9 @@ void engine_launch(struct engine *e, const char *call) {
/* Sit back and wait for the runners to come home. */
swift_barrier_wait(&e->wait_barrier);
/* Store the wallclock time */
e->sched.total_ticks += getticks() - tic;
if (e->verbose)
message("(%s) took %.3f %s.", call, clocks_from_ticks(getticks() - tic),
clocks_getunit());
......
......@@ -1116,6 +1116,7 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
#endif
t->tic = 0;
t->toc = 0;
t->total_ticks = 0;
if (ci != NULL) cell_set_flag(ci, cell_flag_has_tasks);
if (cj != NULL) cell_set_flag(cj, cell_flag_has_tasks);
......@@ -1317,6 +1318,7 @@ void scheduler_reset(struct scheduler *s, int size) {
s->nr_unlocks = 0;
s->completed_unlock_writes = 0;
s->active_count = 0;
s->total_ticks = 0;
/* Set the task pointers in the queues. */
for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks;
......@@ -2043,6 +2045,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) {
/* Task definitely done, signal any sleeping runners. */
if (!t->implicit) {
t->toc = getticks();
t->total_ticks += t->toc - t->tic;
pthread_mutex_lock(&s->sleep_mutex);
atomic_dec(&s->waiting);
pthread_cond_broadcast(&s->sleep_cond);
......@@ -2083,6 +2086,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) {
/* Task definitely done. */
if (!t->implicit) {
t->toc = getticks();
t->total_ticks += t->toc - t->tic;
pthread_mutex_lock(&s->sleep_mutex);
atomic_dec(&s->waiting);
pthread_cond_broadcast(&s->sleep_cond);
......@@ -2366,3 +2370,67 @@ void scheduler_dump_queues(struct engine *e) {
}
fclose(file_thread);
}
void scheduler_report_task_times_mapper(void *map_data, int num_elements,
void *extra_data) {
struct task *tasks = (struct task *)map_data;
float time_local[task_category_count] = {0};
float *time_global = (float *)extra_data;
/* Gather the times spent in the different task categories */
for (int i = 0; i < num_elements; ++i) {
const struct task *t = &tasks[i];
const float total_time = clocks_from_ticks(t->total_ticks);
const enum task_categories cat = task_get_category(t);
time_local[cat] += total_time;
}
/* Update the global counters */
for (int i = 0; i < task_category_count; ++i) {
atomic_add_f(&time_global[i], time_local[i]);
}
}
/**
* @brief Display the time spent in the different task categories.
*
* @param s The #scheduler.
* @param nr_threads The number of threads used in the engine.
*/
void scheduler_report_task_times(const struct scheduler *s,
const int nr_threads) {
const ticks tic = getticks();
/* Initialise counters */
float time[task_category_count] = {0};
threadpool_map(s->threadpool, scheduler_report_task_times_mapper, s->tasks,
s->nr_tasks, sizeof(struct task), threadpool_auto_chunk_size,
time);
/* Total CPU time spent in engine_launch() */
const float total_tasks_time = clocks_from_ticks(s->total_ticks) * nr_threads;
/* Compute the dead time */
float total_time = 0.;
for (int i = 0; i < task_category_count; ++i) {
total_time += time[i];
}
const float dead_time = total_tasks_time - total_time;
message("*** CPU time spent in different task categories:");
for (int i = 0; i < task_category_count; ++i) {
message("*** %20s: %8.2f %s (%.2f %%)", task_category_names[i], time[i],
clocks_getunit(), time[i] / total_tasks_time * 100.);
}
message("*** %20s: %8.2f %s (%.2f %%)", "dead time", dead_time,
clocks_getunit(), dead_time / total_tasks_time * 100.);
message("*** %20s: %8.2f %s (%.2f %%)", "total", total_tasks_time,
clocks_getunit(), total_tasks_time / total_tasks_time * 100.);
/* Done. Report the time spent doing this analysis */
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
......@@ -108,6 +108,9 @@ struct scheduler {
/* 'Pointer' to the seed for the random number generator */
pthread_key_t local_seed_pointer;
/* Total ticks spent running the tasks */
ticks total_ticks;
};
/* Inlined functions (for speed). */
......@@ -203,5 +206,7 @@ void scheduler_free_tasks(struct scheduler *s);
void scheduler_write_dependencies(struct scheduler *s, int verbose);
void scheduler_write_task_level(const struct scheduler *s);
void scheduler_dump_queues(struct engine *e);
void scheduler_report_task_times(const struct scheduler *s,
const int nr_threads);
#endif /* SWIFT_SCHEDULER_H */
......@@ -112,6 +112,11 @@ const char *subtaskID_names[task_subtype_count] = {
"bh_density", "bh_swallow", "do_gas_swallow", "do_bh_swallow",
"bh_feedback"};
const char *task_category_names[task_category_count] = {
"drift", "sort", "hydro", "gravity", "feedback",
"black holes", "cooling", "star formation", "limiter", "time integration",
"mpi", "fof", "others"};
#ifdef WITH_MPI
/* MPI communicators for the subtypes. */
MPI_Comm subtaskMPI_comms[task_subtype_count];
......@@ -1253,3 +1258,101 @@ void task_dump_active(struct engine *e) {
message("took %.3f %s.", clocks_from_ticks(getticks() - tic),
clocks_getunit());
}
/**
* @brief Return the #task_categories of a given #task.
*
* @param t The #task.
*/
enum task_categories task_get_category(const struct task *t) {
switch (t->type) {
case task_type_cooling:
return task_category_cooling;
case task_type_star_formation:
return task_category_star_formation;
case task_type_drift_part:
case task_type_drift_spart:
case task_type_drift_bpart:
case task_type_drift_gpart:
return task_category_drift;
case task_type_sort:
case task_type_stars_sort:
case task_type_stars_resort:
return task_category_sort;
case task_type_send:
case task_type_recv:
return task_category_mpi;
case task_type_kick1:
case task_type_kick2:
case task_type_timestep:
return task_category_time_integration;
case task_type_timestep_limiter:
case task_type_timestep_sync:
return task_category_limiter;
case task_type_ghost:
case task_type_extra_ghost:
case task_type_end_hydro_force:
return task_category_hydro;
case task_type_stars_ghost:
return task_category_feedback;
case task_type_bh_density_ghost:
case task_type_bh_swallow_ghost2:
return task_category_black_holes;
case task_type_init_grav:
case task_type_grav_long_range:
case task_type_grav_mm:
case task_type_grav_down:
case task_type_grav_mesh:
case task_type_end_grav_force:
return task_category_gravity;
case task_type_self:
case task_type_pair:
case task_type_sub_self:
case task_type_sub_pair: {
switch (t->subtype) {
case task_subtype_density:
case task_subtype_gradient:
case task_subtype_force:
return task_category_hydro;
case task_subtype_limiter:
return task_category_limiter;
case task_subtype_grav:
case task_subtype_external_grav:
return task_category_gravity;
case task_subtype_stars_density:
case task_subtype_stars_feedback:
return task_category_feedback;
case task_subtype_bh_density:
case task_subtype_bh_swallow:
case task_subtype_do_gas_swallow:
case task_subtype_do_bh_swallow:
case task_subtype_bh_feedback:
return task_category_black_holes;
default:
return task_category_others;
}
}
default:
return task_category_others;
}
}
......@@ -147,6 +147,26 @@ enum task_actions {
task_action_count
};
/**
* @brief The broad categories of the tasks.
*/
enum task_categories {
task_category_drift,
task_category_sort,
task_category_hydro,
task_category_gravity,
task_category_feedback,
task_category_black_holes,
task_category_cooling,
task_category_star_formation,
task_category_limiter,
task_category_time_integration,
task_category_mpi,
task_category_fof,
task_category_others,
task_category_count
};
/**
* @brief Names of the task types.
*/
......@@ -157,6 +177,11 @@ extern const char *taskID_names[];
*/
extern const char *subtaskID_names[];
/**
* @brief Names of the task categories.
*/
extern const char *task_category_names[];
/**
* @brief The MPI communicators for the different subtypes.
*/
......@@ -223,6 +248,9 @@ struct task {
/*! Start and end time of this task */
ticks tic, toc;
/* Total time spent running this task */
ticks total_ticks;
#ifdef SWIFT_DEBUG_CHECKS
/* When was this task last run? */
integertime_t ti_run;
......@@ -242,6 +270,7 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header,
void task_dump_active(struct engine *e);
void task_get_full_name(int type, int subtype, char *name);
void task_get_group_name(int type, int subtype, char *cluster);
enum task_categories task_get_category(const struct task *t);
#ifdef WITH_MPI
void task_create_mpi_comms(void);
......
......@@ -35,17 +35,17 @@ params = {
"xtick.labelsize": 10,
"ytick.labelsize": 10,
"text.usetex": True,
"figure.figsize": (6.45, 6.45),
"figure.figsize": (12.45, 6.45),
"figure.subplot.left": 0.06,
"figure.subplot.right": 0.99,
"figure.subplot.bottom": 0.06,
"figure.subplot.top": 0.99,
"figure.subplot.wspace": 0.21,
"figure.subplot.hspace": 0.13,
"figure.subplot.wspace": 0.0,
"figure.subplot.hspace": 0.0,
"lines.markersize": 6,
"lines.linewidth": 3.0,
"text.latex.unicode": True,
'hatch.linewidth': 4
"hatch.linewidth": 4,
}
rcParams.update(params)
......@@ -81,6 +81,7 @@ labels = [
["scheduler_reweight:", 1],
["space_list_useful_top_level_cells:", 1],
["space_rebuild:", 1],
["scheduler_report_task_times:", 1],
["engine_drift_all:", 0],
["engine_unskip:", 0],
["engine_unskip_timestep_communications:", 0],
......@@ -138,11 +139,34 @@ cols = [
"#4477AA",
]
tasks = [
"drift",
"sort",
"hydro",
"gravity",
"feedback",
"black holes",
"cooling",
"star formation",
"limiter",
"time integration",
"mpi",
"fof",
"others",
"dead time",
"total",
]
times_tasks = np.zeros(len(tasks))
counts_tasks = np.zeros(len(tasks))
total_time = 0
lastline = ""
for i in range(num_files):
# First analyse the code sections
filename = sys.argv[i + 1]
print("Analysing %s" % filename)
......@@ -170,10 +194,35 @@ for i in range(num_files):
lastline = line
# Total run time
total_time += float(re.findall(r"[+-]?(\[[0-9]\])?(\[[0-9]*[.][0-9]*\])+", lastline)[0][1][1:-1])
total_time += float(
re.findall(r"[+-]?(\[[0-9]\])?(\[[0-9]*[.][0-9]*\])+", lastline)[0][1][1:-1]
)
file.close()
# Now analyse the task times
file = open(filename, "r")
# Search the different phrases
for line in file:
# Loop over the possbile labels
for i in range(len(tasks)):
# Extract the different blocks
if re.search("scheduler_report_task_times: \*\*\* ", line):
if re.search("%s" % tasks[i], line):
counts_tasks[i] += 1.0
times_tasks[i] += float(
re.findall(r":[ ]*[-+]?\d*\.\d+|\d+ ms", line)[0][1:]
)
file.close()
# Conver to seconds
times /= 1000.0
times_tasks /= 1000.0
# Total time
total_measured_time = np.sum(times)
......@@ -192,6 +241,10 @@ for i in range(len(labels)):
times = np.array(times)
time_ratios = np.array(time_ratios)
times_tasks = np.array(times_tasks)
times_tasks_ratios = times_tasks / times_tasks[-1]
times_tasks_ratios = np.array(times_tasks_ratios)
# Sort in order of importance
order = np.argsort(-times)
......@@ -202,7 +255,7 @@ labels = [labels[i] for i in order]
# Remove the regexp escapes to make the labels prettier
for i in range(len(labels)):
labels[i][0] = labels[i][0].replace("\\","")
labels[i][0] = labels[i][0].replace("\\", "")
# Keep only the important components
important_times = [0.0]
......@@ -228,7 +281,10 @@ for i in range(len(labels)):
important_times[0] += times[i]
important_ratios[0] += time_ratios[i]
print(" - '%-40s' (%5d calls, time: %.4fs): %.4f%%" % (labels[i][0], counts[i], times[i], time_ratios[i] * 100))
print(
" - '%-40s' (%5d calls, time: %.4fs): %.4f%%"
% (labels[i][0], counts[i], times[i], time_ratios[i] * 100)
)
# Anything unaccounted for?
print(
......@@ -239,14 +295,26 @@ print(
important_ratios = np.array(important_ratios)
important_is_rebuild = np.array(important_is_rebuild)
print("Time spent in the different task categories (i.e. inside engine_launch()):")
for i in range(len(tasks) - 1):
print(
" - '%-40s' (%5d calls): %.4f%%"
% (tasks[i], counts_tasks[i], 100.0 * times_tasks_ratios[i])
)
print("")
figure()
# Main code sections
subplot(121)
def func(pct):
return "$%4.2f\\%%$" % pct
pie, _, _ = pie(
code_pie, _, _ = pie(
important_ratios,
explode=important_is_rebuild * 0.2,
autopct=lambda pct: func(pct),
......@@ -258,17 +326,32 @@ pie, _, _ = pie(
)
# Use hashing for the FOF and VR wedges
for i in range(len(pie)):
if (important_is_fof[i]):
pie[i].set_hatch('o')
pie[i].set_edgecolor(pie[i].get_facecolor())
pie[i].set_fill(False)
for i in range(len(pie)):
if (important_is_VR[i]):
pie[i].set_hatch('+')
pie[i].set_edgecolor(pie[i].get_facecolor())
pie[i].set_fill(False)
legend(pie, important_labels, title="SWIFT operations", loc="upper left")
for i in range(len(code_pie)):
if important_is_fof[i]:
code_pie[i].set_hatch("o")
code_pie[i].set_edgecolor(code_pie[i].get_facecolor())
code_pie[i].set_fill(False)
for i in range(len(code_pie)):
if important_is_VR[i]:
code_pie[i].set_hatch("+")
code_pie[i].set_edgecolor(code_pie[i].get_facecolor())
code_pie[i].set_fill(False)
legend(code_pie, important_labels, title="SWIFT operations", loc="upper left")
# Tasks
subplot(122)
tasks_pie, _, _ = pie(
times_tasks_ratios[:-1],
autopct=lambda pct: func(pct),
textprops=dict(color="0.1", fontsize=14),
labeldistance=0.7,
pctdistance=0.85,
startangle=-15,
colors=cols[1:],
)
legend(tasks_pie, tasks, title="SWIFT task categories", loc="upper left")
savefig("time_pie.pdf", dpi=150)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment