diff --git a/src/engine.c b/src/engine.c index 10d207b0c6be66e7b274eac21001a7a57050bcdb..95507065daf595d44f7909617885cf82a2309e22 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1670,6 +1670,12 @@ void engine_rebuild(struct engine *e, int repartitioned, e->forcerebuild = 0; e->restarting = 0; + /* Report the time spent in the different task categories */ + if (e->verbose) scheduler_report_task_times(&e->sched, e->nr_threads); + + /* Give some breathing space */ + scheduler_free_tasks(&e->sched); + /* Re-build the space. */ space_rebuild(e->s, repartitioned, e->verbose); @@ -2114,6 +2120,9 @@ void engine_launch(struct engine *e, const char *call) { /* Sit back and wait for the runners to come home. */ swift_barrier_wait(&e->wait_barrier); + /* Store the wallclock time */ + e->sched.total_ticks += getticks() - tic; + if (e->verbose) message("(%s) took %.3f %s.", call, clocks_from_ticks(getticks() - tic), clocks_getunit()); diff --git a/src/scheduler.c b/src/scheduler.c index 24efe915a8a5e1f7a9cf066fd356d54e1dfc7b0d..d7fe3fb46583b92b9b505cf4f9313aabebf01032 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1116,6 +1116,7 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, #endif t->tic = 0; t->toc = 0; + t->total_ticks = 0; if (ci != NULL) cell_set_flag(ci, cell_flag_has_tasks); if (cj != NULL) cell_set_flag(cj, cell_flag_has_tasks); @@ -1317,6 +1318,7 @@ void scheduler_reset(struct scheduler *s, int size) { s->nr_unlocks = 0; s->completed_unlock_writes = 0; s->active_count = 0; + s->total_ticks = 0; /* Set the task pointers in the queues. */ for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; @@ -2043,6 +2045,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { /* Task definitely done, signal any sleeping runners. */ if (!t->implicit) { t->toc = getticks(); + t->total_ticks += t->toc - t->tic; pthread_mutex_lock(&s->sleep_mutex); atomic_dec(&s->waiting); pthread_cond_broadcast(&s->sleep_cond); @@ -2083,6 +2086,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { /* Task definitely done. */ if (!t->implicit) { t->toc = getticks(); + t->total_ticks += t->toc - t->tic; pthread_mutex_lock(&s->sleep_mutex); atomic_dec(&s->waiting); pthread_cond_broadcast(&s->sleep_cond); @@ -2366,3 +2370,67 @@ void scheduler_dump_queues(struct engine *e) { } fclose(file_thread); } + +void scheduler_report_task_times_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct task *tasks = (struct task *)map_data; + float time_local[task_category_count] = {0}; + float *time_global = (float *)extra_data; + + /* Gather the times spent in the different task categories */ + for (int i = 0; i < num_elements; ++i) { + + const struct task *t = &tasks[i]; + const float total_time = clocks_from_ticks(t->total_ticks); + const enum task_categories cat = task_get_category(t); + time_local[cat] += total_time; + } + + /* Update the global counters */ + for (int i = 0; i < task_category_count; ++i) { + atomic_add_f(&time_global[i], time_local[i]); + } +} + +/** + * @brief Display the time spent in the different task categories. + * + * @param s The #scheduler. + * @param nr_threads The number of threads used in the engine. + */ +void scheduler_report_task_times(const struct scheduler *s, + const int nr_threads) { + + const ticks tic = getticks(); + + /* Initialise counters */ + float time[task_category_count] = {0}; + threadpool_map(s->threadpool, scheduler_report_task_times_mapper, s->tasks, + s->nr_tasks, sizeof(struct task), threadpool_auto_chunk_size, + time); + + /* Total CPU time spent in engine_launch() */ + const float total_tasks_time = clocks_from_ticks(s->total_ticks) * nr_threads; + + /* Compute the dead time */ + float total_time = 0.; + for (int i = 0; i < task_category_count; ++i) { + total_time += time[i]; + } + const float dead_time = total_tasks_time - total_time; + + message("*** CPU time spent in different task categories:"); + for (int i = 0; i < task_category_count; ++i) { + message("*** %20s: %8.2f %s (%.2f %%)", task_category_names[i], time[i], + clocks_getunit(), time[i] / total_tasks_time * 100.); + } + message("*** %20s: %8.2f %s (%.2f %%)", "dead time", dead_time, + clocks_getunit(), dead_time / total_tasks_time * 100.); + message("*** %20s: %8.2f %s (%.2f %%)", "total", total_tasks_time, + clocks_getunit(), total_tasks_time / total_tasks_time * 100.); + + /* Done. Report the time spent doing this analysis */ + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} diff --git a/src/scheduler.h b/src/scheduler.h index 23cb39afbd7ac890d2d3c7ecacef3bffe01b0128..19baedca557762fb567aec81df33c17ed317dbc0 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -108,6 +108,9 @@ struct scheduler { /* 'Pointer' to the seed for the random number generator */ pthread_key_t local_seed_pointer; + + /* Total ticks spent running the tasks */ + ticks total_ticks; }; /* Inlined functions (for speed). */ @@ -203,5 +206,7 @@ void scheduler_free_tasks(struct scheduler *s); void scheduler_write_dependencies(struct scheduler *s, int verbose); void scheduler_write_task_level(const struct scheduler *s); void scheduler_dump_queues(struct engine *e); +void scheduler_report_task_times(const struct scheduler *s, + const int nr_threads); #endif /* SWIFT_SCHEDULER_H */ diff --git a/src/task.c b/src/task.c index 4930d28a9e9477f012f1167cb46c2f0ed7a1fa4a..19ba01580e6ca519a48fd934d0bb8c21b0670343 100644 --- a/src/task.c +++ b/src/task.c @@ -112,6 +112,11 @@ const char *subtaskID_names[task_subtype_count] = { "bh_density", "bh_swallow", "do_gas_swallow", "do_bh_swallow", "bh_feedback"}; +const char *task_category_names[task_category_count] = { + "drift", "sort", "hydro", "gravity", "feedback", + "black holes", "cooling", "star formation", "limiter", "time integration", + "mpi", "fof", "others"}; + #ifdef WITH_MPI /* MPI communicators for the subtypes. */ MPI_Comm subtaskMPI_comms[task_subtype_count]; @@ -1253,3 +1258,101 @@ void task_dump_active(struct engine *e) { message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); } + +/** + * @brief Return the #task_categories of a given #task. + * + * @param t The #task. + */ +enum task_categories task_get_category(const struct task *t) { + + switch (t->type) { + + case task_type_cooling: + return task_category_cooling; + + case task_type_star_formation: + return task_category_star_formation; + + case task_type_drift_part: + case task_type_drift_spart: + case task_type_drift_bpart: + case task_type_drift_gpart: + return task_category_drift; + + case task_type_sort: + case task_type_stars_sort: + case task_type_stars_resort: + return task_category_sort; + + case task_type_send: + case task_type_recv: + return task_category_mpi; + + case task_type_kick1: + case task_type_kick2: + case task_type_timestep: + return task_category_time_integration; + + case task_type_timestep_limiter: + case task_type_timestep_sync: + return task_category_limiter; + + case task_type_ghost: + case task_type_extra_ghost: + case task_type_end_hydro_force: + return task_category_hydro; + + case task_type_stars_ghost: + return task_category_feedback; + + case task_type_bh_density_ghost: + case task_type_bh_swallow_ghost2: + return task_category_black_holes; + + case task_type_init_grav: + case task_type_grav_long_range: + case task_type_grav_mm: + case task_type_grav_down: + case task_type_grav_mesh: + case task_type_end_grav_force: + return task_category_gravity; + + case task_type_self: + case task_type_pair: + case task_type_sub_self: + case task_type_sub_pair: { + switch (t->subtype) { + + case task_subtype_density: + case task_subtype_gradient: + case task_subtype_force: + return task_category_hydro; + + case task_subtype_limiter: + return task_category_limiter; + + case task_subtype_grav: + case task_subtype_external_grav: + return task_category_gravity; + + case task_subtype_stars_density: + case task_subtype_stars_feedback: + return task_category_feedback; + + case task_subtype_bh_density: + case task_subtype_bh_swallow: + case task_subtype_do_gas_swallow: + case task_subtype_do_bh_swallow: + case task_subtype_bh_feedback: + return task_category_black_holes; + + default: + return task_category_others; + } + } + + default: + return task_category_others; + } +} diff --git a/src/task.h b/src/task.h index 02e1c5ca385749c3dff347b44d8e1f0e984fadd0..a093031c9ffeb8ec8480bc22f0b7bd9b429c0386 100644 --- a/src/task.h +++ b/src/task.h @@ -147,6 +147,26 @@ enum task_actions { task_action_count }; +/** + * @brief The broad categories of the tasks. + */ +enum task_categories { + task_category_drift, + task_category_sort, + task_category_hydro, + task_category_gravity, + task_category_feedback, + task_category_black_holes, + task_category_cooling, + task_category_star_formation, + task_category_limiter, + task_category_time_integration, + task_category_mpi, + task_category_fof, + task_category_others, + task_category_count +}; + /** * @brief Names of the task types. */ @@ -157,6 +177,11 @@ extern const char *taskID_names[]; */ extern const char *subtaskID_names[]; +/** + * @brief Names of the task categories. + */ +extern const char *task_category_names[]; + /** * @brief The MPI communicators for the different subtypes. */ @@ -223,6 +248,9 @@ struct task { /*! Start and end time of this task */ ticks tic, toc; + /* Total time spent running this task */ + ticks total_ticks; + #ifdef SWIFT_DEBUG_CHECKS /* When was this task last run? */ integertime_t ti_run; @@ -242,6 +270,7 @@ void task_dump_stats(const char *dumpfile, struct engine *e, int header, void task_dump_active(struct engine *e); void task_get_full_name(int type, int subtype, char *name); void task_get_group_name(int type, int subtype, char *cluster); +enum task_categories task_get_category(const struct task *t); #ifdef WITH_MPI void task_create_mpi_comms(void); diff --git a/tools/analyse_runtime.py b/tools/analyse_runtime.py index 382d4ab5cab100c04e82ae3b9813a37bec43bb0e..7f3b5ee593333c57aba1e150bccb8e1da33de259 100755 --- a/tools/analyse_runtime.py +++ b/tools/analyse_runtime.py @@ -35,17 +35,17 @@ params = { "xtick.labelsize": 10, "ytick.labelsize": 10, "text.usetex": True, - "figure.figsize": (6.45, 6.45), + "figure.figsize": (12.45, 6.45), "figure.subplot.left": 0.06, "figure.subplot.right": 0.99, "figure.subplot.bottom": 0.06, "figure.subplot.top": 0.99, - "figure.subplot.wspace": 0.21, - "figure.subplot.hspace": 0.13, + "figure.subplot.wspace": 0.0, + "figure.subplot.hspace": 0.0, "lines.markersize": 6, "lines.linewidth": 3.0, "text.latex.unicode": True, - 'hatch.linewidth': 4 + "hatch.linewidth": 4, } rcParams.update(params) @@ -81,6 +81,7 @@ labels = [ ["scheduler_reweight:", 1], ["space_list_useful_top_level_cells:", 1], ["space_rebuild:", 1], + ["scheduler_report_task_times:", 1], ["engine_drift_all:", 0], ["engine_unskip:", 0], ["engine_unskip_timestep_communications:", 0], @@ -138,11 +139,34 @@ cols = [ "#4477AA", ] +tasks = [ + "drift", + "sort", + "hydro", + "gravity", + "feedback", + "black holes", + "cooling", + "star formation", + "limiter", + "time integration", + "mpi", + "fof", + "others", + "dead time", + "total", +] + +times_tasks = np.zeros(len(tasks)) +counts_tasks = np.zeros(len(tasks)) + total_time = 0 lastline = "" for i in range(num_files): + # First analyse the code sections + filename = sys.argv[i + 1] print("Analysing %s" % filename) @@ -170,10 +194,35 @@ for i in range(num_files): lastline = line # Total run time - total_time += float(re.findall(r"[+-]?(\[[0-9]\])?(\[[0-9]*[.][0-9]*\])+", lastline)[0][1][1:-1]) + total_time += float( + re.findall(r"[+-]?(\[[0-9]\])?(\[[0-9]*[.][0-9]*\])+", lastline)[0][1][1:-1] + ) + + file.close() + + # Now analyse the task times + + file = open(filename, "r") + + # Search the different phrases + for line in file: + + # Loop over the possbile labels + for i in range(len(tasks)): + + # Extract the different blocks + if re.search("scheduler_report_task_times: \*\*\* ", line): + if re.search("%s" % tasks[i], line): + counts_tasks[i] += 1.0 + times_tasks[i] += float( + re.findall(r":[ ]*[-+]?\d*\.\d+|\d+ ms", line)[0][1:] + ) + + file.close() # Conver to seconds times /= 1000.0 +times_tasks /= 1000.0 # Total time total_measured_time = np.sum(times) @@ -192,6 +241,10 @@ for i in range(len(labels)): times = np.array(times) time_ratios = np.array(time_ratios) +times_tasks = np.array(times_tasks) + +times_tasks_ratios = times_tasks / times_tasks[-1] +times_tasks_ratios = np.array(times_tasks_ratios) # Sort in order of importance order = np.argsort(-times) @@ -202,7 +255,7 @@ labels = [labels[i] for i in order] # Remove the regexp escapes to make the labels prettier for i in range(len(labels)): - labels[i][0] = labels[i][0].replace("\\","") + labels[i][0] = labels[i][0].replace("\\", "") # Keep only the important components important_times = [0.0] @@ -228,7 +281,10 @@ for i in range(len(labels)): important_times[0] += times[i] important_ratios[0] += time_ratios[i] - print(" - '%-40s' (%5d calls, time: %.4fs): %.4f%%" % (labels[i][0], counts[i], times[i], time_ratios[i] * 100)) + print( + " - '%-40s' (%5d calls, time: %.4fs): %.4f%%" + % (labels[i][0], counts[i], times[i], time_ratios[i] * 100) + ) # Anything unaccounted for? print( @@ -239,14 +295,26 @@ print( important_ratios = np.array(important_ratios) important_is_rebuild = np.array(important_is_rebuild) +print("Time spent in the different task categories (i.e. inside engine_launch()):") + +for i in range(len(tasks) - 1): + print( + " - '%-40s' (%5d calls): %.4f%%" + % (tasks[i], counts_tasks[i], 100.0 * times_tasks_ratios[i]) + ) +print("") figure() +# Main code sections +subplot(121) + + def func(pct): return "$%4.2f\\%%$" % pct -pie, _, _ = pie( +code_pie, _, _ = pie( important_ratios, explode=important_is_rebuild * 0.2, autopct=lambda pct: func(pct), @@ -258,17 +326,32 @@ pie, _, _ = pie( ) # Use hashing for the FOF and VR wedges -for i in range(len(pie)): - if (important_is_fof[i]): - pie[i].set_hatch('o') - pie[i].set_edgecolor(pie[i].get_facecolor()) - pie[i].set_fill(False) -for i in range(len(pie)): - if (important_is_VR[i]): - pie[i].set_hatch('+') - pie[i].set_edgecolor(pie[i].get_facecolor()) - pie[i].set_fill(False) - -legend(pie, important_labels, title="SWIFT operations", loc="upper left") +for i in range(len(code_pie)): + if important_is_fof[i]: + code_pie[i].set_hatch("o") + code_pie[i].set_edgecolor(code_pie[i].get_facecolor()) + code_pie[i].set_fill(False) +for i in range(len(code_pie)): + if important_is_VR[i]: + code_pie[i].set_hatch("+") + code_pie[i].set_edgecolor(code_pie[i].get_facecolor()) + code_pie[i].set_fill(False) + +legend(code_pie, important_labels, title="SWIFT operations", loc="upper left") + +# Tasks +subplot(122) + +tasks_pie, _, _ = pie( + times_tasks_ratios[:-1], + autopct=lambda pct: func(pct), + textprops=dict(color="0.1", fontsize=14), + labeldistance=0.7, + pctdistance=0.85, + startangle=-15, + colors=cols[1:], +) + +legend(tasks_pie, tasks, title="SWIFT task categories", loc="upper left") savefig("time_pie.pdf", dpi=150)