Commit 832f6c22 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Add fixed costs for repartitioning

Also adds new task dumps with basic stats of the tasks and a .h form suitable for inclusion in partition.c
parent 6c1f74db
......@@ -1089,6 +1089,9 @@ int main(int argc, char *argv[]) {
/* Dump the task data using the given frequency. */
if (dump_tasks && (dump_tasks == 1 || j % dump_tasks == 1)) {
task_dump_all(&e, j + 1);
/* Generate the task statistics. */
task_dump_stats(&e, j + 1);
}
#endif
......
......@@ -77,6 +77,15 @@ const char *repartition_name[] = {
/* Local functions, if needed. */
static int check_complete(struct space *s, int verbose, int nregions);
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
/*
* Repartition fixed costs per type/subtype. These are determined from the
* statistics output produced when running with task debugging enabled.
*/
static double repartition_costs[task_type_count][task_subtype_count];
static void repart_init_fixed_costs(int policy);
#endif
/* Vectorisation support */
/* ===================== */
......@@ -1195,10 +1204,11 @@ void partition_gather_weights(void *map_data, int num_elements,
struct task *t = &tasks[i];
/* Skip un-interesting tasks. */
if (t->cost == 0.f) continue;
if (t->type == task_type_send || t->type == task_type_recv ||
t->type == task_type_logger || t->implicit) continue;
/* Get the task weight based on costs. */
double w = (double)t->cost;
/* Get the task weight based on fixed cost for this task type. */
double w = repartition_costs[t->type][t->subtype];
/* Get the top-level cells involved. */
struct cell *ci, *cj;
......@@ -1527,6 +1537,7 @@ void partition_repartition(struct repartition *reparttype, int nodeID,
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
ticks tic = getticks();
repart_init_fixed_costs(s->e->policy);
if (reparttype->type == REPART_METIS_VERTEX_EDGE_COSTS) {
repart_edge_metis(1, 1, 0, reparttype, nodeID, nr_nodes, s, tasks,
......@@ -1700,7 +1711,7 @@ void partition_initial_partition(struct partition *initial_partition,
/**
* @brief Initialises the partition and re-partition scheme from the parameter
* file
* file.
*
* @param partition The #partition scheme to initialise.
* @param repartition The #repartition scheme to initialise.
......@@ -1843,6 +1854,95 @@ void partition_init(struct partition *partition,
#endif
}
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
/**
* @brief Set the fixed costs for repartition using METIS.
*
* These are determined using a run with task debugging enabled which gives a
* statistical analysis condensed into a .h file. Note that some tasks have
* different costs depending on the engine policies, for instance the kicks
* do work with self gravity and hydro, we attempt to allow for that. Finally
* note this is a statistical solution, so requires that there are sufficient
* tasks on each rank so that the fixed costs do the right thing on average,
* you may like to used task ticks as weights if this isn't working.
*
* @param policy the #engine policy.
*/
static void repart_init_fixed_costs(int policy) {
/* Set the default fixed cost. */
for (int j = 0; j < task_type_count; j++) {
for (int k = 0; k < task_subtype_count; k++) {
repartition_costs[j][k] = 1.0;
}
}
/* TODO: these may be separable, so we could have costs for each policy
* addition, if separated need to take care with the relative scaling. */
if (policy & (engine_policy_hydro & engine_policy_self_gravity)) {
/* EAGLE_50 -s -G -S 8 nodes 16 cores */
repartition_costs[1][0] = 45842; /* sort/none */
repartition_costs[2][1] = 15609; /* self/density */
repartition_costs[2][3] = 18830; /* self/force */
repartition_costs[2][4] = 202588; /* self/grav */
repartition_costs[3][1] = 67; /* pair/density */
repartition_costs[3][3] = 207; /* pair/force */
repartition_costs[3][4] = 1507; /* pair/grav */
repartition_costs[4][1] = 36268; /* sub_self/density */
repartition_costs[4][3] = 52252; /* sub_self/force */
repartition_costs[5][1] = 709; /* sub_pair/density */
repartition_costs[5][3] = 1258; /* sub_pair/force */
repartition_costs[6][0] = 1135; /* init_grav/none */
repartition_costs[9][0] = 160; /* ghost/none */
repartition_costs[12][0] = 7176; /* drift_part/none */
repartition_costs[13][0] = 4502; /* drift_gpart/none */
repartition_costs[15][0] = 8234; /* end_force/none */
repartition_costs[16][0] = 15508; /* kick1/none */
repartition_costs[17][0] = 15780; /* kick2/none */
repartition_costs[18][0] = 19848; /* timestep/none */
repartition_costs[21][0] = 4105; /* grav_long_range/none */
repartition_costs[22][0] = 68; /* grav_mm/none */
repartition_costs[24][0] = 16785; /* grav_down/none */
repartition_costs[25][0] = 70632; /* grav_mesh/none */
} else if (policy & engine_policy_self_gravity) {
/* EAGLE_50 -G 8 nodes 16 cores, scaled to match self/grav. */
repartition_costs[2][4] = 202588; /* self/grav */
repartition_costs[3][4] = 1760; /* pair/grav */
repartition_costs[6][0] = 1610; /* init_grav/none */
repartition_costs[13][0] = 999; /* drift_gpart/none */
repartition_costs[15][0] = 3481; /* end_force/none */
repartition_costs[16][0] = 6336; /* kick1/none */
repartition_costs[17][0] = 6343; /* kick2/none */
repartition_costs[18][0] = 13864; /* timestep/none */
repartition_costs[21][0] = 1422; /* grav_long_range/none */
repartition_costs[22][0] = 71; /* grav_mm/none */
repartition_costs[24][0] = 16011; /* grav_down/none */
repartition_costs[25][0] = 60414; /* grav_mesh/none */
} else if (policy & engine_policy_hydro) {
/* EAGLE_50 -s 8 nodes 16 cores, not scaled, but similar. */
repartition_costs[1][0] = 52733; /* sort/none */
repartition_costs[2][1] = 15458; /* self/density */
repartition_costs[2][3] = 19212; /* self/force */
repartition_costs[3][1] = 74; /* pair/density */
repartition_costs[3][3] = 242; /* pair/force */
repartition_costs[4][1] = 42895; /* sub_self/density */
repartition_costs[4][3] = 64254; /* sub_self/force */
repartition_costs[5][1] = 818; /* sub_pair/density */
repartition_costs[5][3] = 1443; /* sub_pair/force */
repartition_costs[9][0] = 159; /* ghost/none */
repartition_costs[12][0] = 6708; /* drift_part/none */
repartition_costs[15][0] = 6479; /* end_force/none */
repartition_costs[16][0] = 6609; /* kick1/none */
repartition_costs[17][0] = 6975; /* kick2/none */
repartition_costs[18][0] = 5229; /* timestep/none */
}
}
#endif
/* General support */
/* =============== */
......
......@@ -1817,16 +1817,11 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
for (int k = nr_tasks - 1; k >= 0; k--) {
struct task *t = &tasks[tid[k]];
t->weight = 0.f;
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
t->cost = 0.f;
#endif
for (int j = 0; j < t->nr_unlock_tasks; j++)
if (t->unlock_tasks[j]->weight > t->weight)
t->weight = t->unlock_tasks[j]->weight;
float cost = 0.f;
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
int partcost = 1;
#endif
const float count_i = (t->ci != NULL) ? t->ci->hydro.count : 0.f;
const float count_j = (t->cj != NULL) ? t->cj->hydro.count : 0.f;
......@@ -1847,9 +1842,9 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
break;
case task_type_self:
if (t->subtype == task_subtype_grav)
if (t->subtype == task_subtype_grav) {
cost = 1.f * (wscale * gcount_i) * gcount_i;
else if (t->subtype == task_subtype_external_grav)
} else if (t->subtype == task_subtype_external_grav)
cost = 1.f * wscale * gcount_i;
else if (t->subtype == task_subtype_stars_density)
cost = 1.f * wscale * scount_i * count_i;
......@@ -1949,18 +1944,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
cost = wscale * count_i + wscale * gcount_i;
break;
case task_type_send:
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
partcost = 0;
#endif
if (count_i < 1e5)
cost = 10.f * (wscale * count_i) * count_i;
else
cost = 2e9;
break;
case task_type_recv:
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
partcost = 0;
#endif
if (count_i < 1e5)
cost = 5.f * (wscale * count_i) * count_i;
else
......@@ -1970,10 +1959,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) {
cost = 0;
break;
}
#if defined(WITH_MPI) && (defined(HAVE_PARMETIS) || defined(HAVE_METIS))
if (partcost) t->cost = cost;
#endif
t->weight += cost;
}
......
......@@ -579,7 +579,8 @@ void task_create_mpi_comms(void) {
#endif
/**
* @brief dump all the tasks of all the known engines into a file for postprocessing.
* @brief dump all the tasks of all the known engines into a file for
* postprocessing.
*
* Dumps the information to a file "thread_info-stepn.dat" where n is the
* given step value, or "thread_info_MPI-stepn.dat", if we are running
......@@ -681,5 +682,129 @@ void task_dump_all(struct engine *e, int step) {
}
fclose(file_thread);
#endif // WITH_MPI
#endif // SWIFT_DEBUG_TASKS
#endif // SWIFT_DEBUG_TASKS
}
/**
* @brief Generate simple statistics about the times used by the tasks of
* all the engines and write these into two files, a human readable
* version and one intented for inclusion as the fixed costs for
* repartitioning.
*
* Dumps the human readable information to a file "thread_stats-stepn.dat"
* where n is the given step value. When running under MPI all the tasks are
* summed into this single file.
*
* The fixed costs file will be called "thread_stats-stepn.h".
*
* @param e the #engine
* @param step the current step.
*/
void task_dump_stats(struct engine *e, int step) {
#ifdef SWIFT_DEBUG_TASKS
char dumpfile[40];
snprintf(dumpfile, 40, "thread_stats-step%d.dat", step);
char costsfile[40];
snprintf(costsfile, 40, "thread_stats-step%d.h", step);
/* Need arrays for sum, min and max across all types and subtypes. */
double sum[task_type_count][task_subtype_count];
double min[task_type_count][task_subtype_count];
double max[task_type_count][task_subtype_count];
int count[task_type_count][task_subtype_count];
for (int j = 0; j < task_type_count; j++) {
for (int k = 0; k < task_subtype_count; k++) {
sum[j][k] = 0.0;
count[j][k] = 0;
min[j][k] = DBL_MAX;
max[j][k] = 0.0;
}
}
double total[1] = {0.0};
double minmin[1] = {DBL_MAX};
for (int l = 0; l < e->sched.nr_tasks; l++) {
int type = e->sched.tasks[l].type;
/* Skip implicit tasks, tasks that didn't run and MPI send/recv as these
* are not interesting (or meaningfully measured). */
if (!e->sched.tasks[l].implicit && e->sched.tasks[l].toc != 0 &&
type != task_type_send && type != task_type_recv) {
int subtype = e->sched.tasks[l].subtype;
double dt = e->sched.tasks[l].toc - e->sched.tasks[l].tic;
sum[type][subtype] += dt;
count[type][subtype] += 1;
if (dt < min[type][subtype]) {
min[type][subtype] = dt;
}
if (dt > max[type][subtype]) {
max[type][subtype] = dt;
}
total[0] += dt;
if (dt < minmin[0]) minmin[0] = dt;
}
}
#ifdef WITH_MPI
/* Get these from all ranks for output from rank 0. Could wrap these into a
* single operation. */
size_t size = task_type_count * task_subtype_count;
int res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : sum), sum, size,
MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task sums");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : count), count, size,
MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task counts");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : min), min, size,
MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task minima");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : max), max, size,
MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task maxima");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : total), total, 1,
MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task total time");
res = MPI_Reduce((engine_rank == 0 ? MPI_IN_PLACE : minmin), minmin, 1,
MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
if (res != MPI_SUCCESS) mpi_error(res, "Failed to reduce task global min");
if (engine_rank == 0) {
#endif
FILE *dfile = fopen(dumpfile, "w");
FILE *cfile = fopen(costsfile, "w");
fprintf(dfile, "# task ntasks min max sum mean percent fixed_cost\n");
for (int j = 0; j < task_type_count; j++) {
const char *taskID = taskID_names[j];
for (int k = 0; k < task_subtype_count; k++) {
if (sum[j][k] > 0.0) {
double mean = sum[j][k] / (double)count[j][k];
double perc = 100.0 * sum[j][k] / total[0];
int fixed_cost = (int)mean / minmin[0];
fprintf(dfile,
"%15s/%-10s %10d %14.2f %14.2f %14.2f %14.2f %14.2f %10d\n",
taskID, subtaskID_names[k], count[j][k], min[j][k], max[j][k],
sum[j][k], mean, perc, fixed_cost);
fprintf(cfile, "repartition_costs[%d][%d] = %10d; /* %s/%s */\n", j,
k, fixed_cost, taskID, subtaskID_names[k]);
}
}
}
fclose(dfile);
fclose(cfile);
#ifdef WITH_MPI
}
#endif
#endif // SWIFT_DEBUG_TASKS
}
......@@ -156,11 +156,6 @@ struct task {
/*! Weight of the task */
float weight;
#if defined(WITH_MPI) && (defined(HAVE_METIS) || defined(HAVE_PARMETIS))
/*! Individual cost estimate for this task. */
float cost;
#endif
/*! Number of tasks unlocked by this one */
short int nr_unlock_tasks;
......@@ -204,6 +199,7 @@ int task_lock(struct task *t);
void task_do_rewait(struct task *t);
void task_print(const struct task *t);
void task_dump_all(struct engine *e, int step);
void task_dump_stats(struct engine *e, int step);
#ifdef WITH_MPI
void task_create_mpi_comms(void);
#endif
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment