diff --git a/examples/main.c b/examples/main.c index 0e3cdc9b33b99aea2c8178c8469aa215c76c8fb6..6882b4753049bcab08effb981b8076b9bf1156e5 100644 --- a/examples/main.c +++ b/examples/main.c @@ -283,11 +283,11 @@ int main(int argc, char *argv[]) { #ifndef SWIFT_DEBUG_TASKS if (dump_tasks) { - if (myrank == 0) { - message("WARNING: complete task dumps are only created when " - "configured with --enable-task-debugging."); - message(" Basic task statistics will be output."); - } + if (myrank == 0) { + message("WARNING: complete task dumps are only created when " + "configured with --enable-task-debugging."); + message(" Basic task statistics will be output."); + } } #endif @@ -1037,7 +1037,9 @@ int main(int argc, char *argv[]) { #endif /* Generate the task statistics. */ - task_dump_stats(&e, j + 1); + char dumpfile[40]; + snprintf(dumpfile, 40, "thread_stats-step%d.dat", j + 1); + task_dump_stats(dumpfile, &e, /* header = */ 0, /* allranks = */ 1); } #ifdef SWIFT_DEBUG_THREADPOOL diff --git a/src/Makefile.am b/src/Makefile.am index 9b0610667bdcf1f760f6e94d4481848a8fc4d0f0..aa8972f9ee5d2f366b268d9107b4cb0c4f6c49bf 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,8 +41,8 @@ endif # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \ - common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h \ - physical_constants.h physical_constants_cgs.h potential.h version.h \ + common_io.h single_io.h multipole.h map.h tools.h partition.h partition_fixed_costs.h \ + clocks.h parser.h physical_constants.h physical_constants_cgs.h potential.h version.h \ hydro_properties.h riemann.h threadpool.h cooling_io.h cooling.h cooling_struct.h \ sourceterms.h sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ diff --git a/src/engine.c b/src/engine.c index ac9456f1c8a2ba909877d9f520a887e2883be08e..92e892a7c8c0fa0963198aa2b5eb6e12aab76e40 100644 --- a/src/engine.c +++ b/src/engine.c @@ -991,6 +991,11 @@ void engine_repartition(struct engine *e) { * bug that doesn't handle this case well. */ if (e->nr_nodes == 1) return; + /* Generate the fixed costs include file. */ + if (e->step > 3 && e->reparttype->trigger <= 1.f) { + task_dump_stats("partition_fixed_costs.h", e, /* header = */ 1, /* allranks = */ 1); + } + /* Do the repartitioning. */ partition_repartition(e->reparttype, e->nodeID, e->nr_nodes, e->s, e->sched.tasks, e->sched.nr_tasks); diff --git a/src/task.c b/src/task.c index 00c796423346dc1a7d8cba64bd943a33ef57908f..0f5b0e8d54540b776bf3f4377fb061c48e9316ab 100644 --- a/src/task.c +++ b/src/task.c @@ -687,26 +687,26 @@ void task_dump_all(struct engine *e, int step) { /** * @brief Generate simple statistics about the times used by the tasks of - * all the engines and write these into two files, a human readable - * version and one intented for inclusion as the fixed costs for - * repartitioning. + * all the engines and write these into two format, a human readable + * version for debugging and one intented for inclusion as the fixed + * costs for repartitioning. * - * Dumps the human readable information to a file "thread_stats-stepn.dat" - * where n is the given step value. When running under MPI all the tasks are - * summed into this single file. + * Note that when running under MPI all the tasks can be summed into this single + * file. In the fuller, human readable file, the statistics included are the + * number of task of each type/subtype followed by the minimum, maximum, mean + * and total time, in millisec and then the fixed costs value. * - * The fixed costs file will be called "thread_stats-stepn.h". + * If header is set, only the fixed costs value is written into the output + * file in a format that is suitable for inclusion in SWIFT (as + * partition_fixed_costs.h). * + * @param dumpfile name of the file for the output. * @param e the #engine - * @param step the current step. + * @param header whether to write a header include file. + * @param allranks do the statistics over all ranks, if not just the current + * one, only used if header is false. */ -void task_dump_stats(struct engine *e, int step) { - - char dumpfile[40]; - snprintf(dumpfile, 40, "thread_stats-step%d.dat", step); - - char costsfile[40]; - snprintf(costsfile, 40, "thread_stats-step%d.h", step); +void task_dump_stats(const char *dumpfile, struct engine *e, int header, int allranks) { /* Need arrays for sum, min and max across all types and subtypes. */ double sum[task_type_count][task_subtype_count]; @@ -746,39 +746,43 @@ void task_dump_stats(struct engine *e, int step) { } } -#ifdef WITH_MPI - /* Get these from all ranks for output from rank 0. Could wrap these into a - * single operation. */ - size_t size = task_type_count * task_subtype_count; - int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size, - MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums"); - - res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size, - MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); - if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts"); - - res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size, - MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); - if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima"); - res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size, - MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); - if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima"); - - res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1, - MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); - if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time"); +#ifdef WITH_MPI + if (allranks || header) { + /* Get these from all ranks for output from rank 0. Could wrap these into a + * single operation. */ + size_t size = task_type_count * task_subtype_count; + int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size, + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums"); + + res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size, + MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts"); + + res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size, + MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima"); + + res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size, + MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima"); + + res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1, + MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time"); + } - if (engine_rank == 0) { + if (!allranks || (engine_rank == 0 && (allranks || header))) { #endif FILE *dfile = fopen(dumpfile, "w"); - fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n"); - - FILE *cfile = fopen(costsfile, "w"); - fprintf(cfile, "/* use as src/partition_fixed_costs.h */\n"); - fprintf(cfile, "#define HAVE_FIXED_COSTS 1\n"); + if (header) { + fprintf(dfile, "/* use as src/partition_fixed_costs.h */\n"); + fprintf(dfile, "#define HAVE_FIXED_COSTS 1\n"); + } else { + fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n"); + } for (int j = 0; j < task_type_count; j++) { const char *taskID = taskID_names[j]; @@ -790,19 +794,21 @@ void task_dump_stats(struct engine *e, int step) { /* Fixed cost is in .1ns as we want to compare between runs in * some absolute units. */ int fixed_cost = (int)(clocks_from_ticks(mean) * 10000.f); - fprintf(dfile, - "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n", - taskID, subtaskID_names[k], count[j][k], - clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]), - clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), - perc, fixed_cost); - fprintf(cfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j, - k, fixed_cost, taskID, subtaskID_names[k]); + if (header) { + fprintf(dfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j, + k, fixed_cost, taskID, subtaskID_names[k]); + } else { + fprintf(dfile, + "%15s/%-10s %10d %14.4f %14.4f %14.4f %14.4f %14.4f %10d\n", + taskID, subtaskID_names[k], count[j][k], + clocks_from_ticks(min[j][k]), clocks_from_ticks(max[j][k]), + clocks_from_ticks(sum[j][k]), clocks_from_ticks(mean), + perc, fixed_cost); + } } } } fclose(dfile); - fclose(cfile); #ifdef WITH_MPI } #endif diff --git a/src/task.h b/src/task.h index 2122f757220f2fea6691c45370b513a913aeeac8..61af35c69449972ee235d578eec320b74a1bff3e 100644 --- a/src/task.h +++ b/src/task.h @@ -199,7 +199,7 @@ int task_lock(struct task *t); void task_do_rewait(struct task *t); void task_print(const struct task *t); void task_dump_all(struct engine *e, int step); -void task_dump_stats(struct engine *e, int step); +void task_dump_stats(const char *dumpfile, struct engine *e, int header, int allranks); #ifdef WITH_MPI void task_create_mpi_comms(void); #endif