Commit 582d0fec authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'cells-per-task' into 'master'

Make the expected number of tasks per cell a parameter.

Previously we had a max of 96, much smaller numbers will work depending on the job

See merge request !395
parents cf58ff8b db43eae9
...@@ -28,7 +28,7 @@ SPH: ...@@ -28,7 +28,7 @@ SPH:
resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel). resolution_eta: 1.2348 # Target smoothing length in units of the mean inter-particle separation (1.2348 == 48Ngbs with the cubic spline kernel).
delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours. delta_neighbours: 0.1 # The tolerance for the targetted number of neighbours.
CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration. CFL_condition: 0.1 # Courant-Friedrich-Levy condition for time integration.
# Parameters related to the initial conditions # Parameters related to the initial conditions
InitialConditions: InitialConditions:
file_name: ./multiTypes.hdf5 # The file to read file_name: ./multiTypes.hdf5 # The file to read
......
...@@ -8,12 +8,13 @@ InternalUnitSystem: ...@@ -8,12 +8,13 @@ InternalUnitSystem:
# Parameters for the task scheduling # Parameters for the task scheduling
Scheduler: Scheduler:
nr_queues: 0 # (Optional) The number of task queues to use. Use 0 to let the system decide. nr_queues: 0 # (Optional) The number of task queues to use. Use 0 to let the system decide.
cell_max_size: 8000000 # (Optional) Maximal number of interactions per task if we force the split (this is the default value). cell_max_size: 8000000 # (Optional) Maximal number of interactions per task if we force the split (this is the default value).
cell_sub_size_pair: 256000000 # (Optional) Maximal number of interactions per sub-pair task (this is the default value). cell_sub_size_pair: 256000000 # (Optional) Maximal number of interactions per sub-pair task (this is the default value).
cell_sub_size_self: 32000 # (Optional) Maximal number of interactions per sub-self task (this is the default value). cell_sub_size_self: 32000 # (Optional) Maximal number of interactions per sub-self task (this is the default value).
cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value). cell_split_size: 400 # (Optional) Maximal number of particles per cell (this is the default value).
max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value). max_top_level_cells: 12 # (Optional) Maximal number of top-level cells in any dimension. The number of top-level cells will be the cube of this (this is the default value).
tasks_per_cell: 0 # (Optional) The average number of tasks per cell. If not large enough the simulation will fail (means guess...).
# Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.) # Parameters governing the time integration (Set dt_min and dt_max to the same value for a fixed time-step run.)
TimeIntegration: TimeIntegration:
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "debug.h" #include "debug.h"
/* Local includes. */ /* Local includes. */
#include "active.h"
#include "cell.h" #include "cell.h"
#include "engine.h" #include "engine.h"
#include "hydro.h" #include "hydro.h"
...@@ -269,6 +270,79 @@ int checkCellhdxmax(const struct cell *c, int *depth) { ...@@ -269,6 +270,79 @@ int checkCellhdxmax(const struct cell *c, int *depth) {
return result; return result;
} }
/**
* @brief map function for dumping cells. In MPI mode locally active cells
* only.
*/
static void dumpCells_map(struct cell *c, void *data) {
uintptr_t *ldata = (uintptr_t *)data;
FILE *file = (FILE *)ldata[0];
struct engine *e = (struct engine *)ldata[1];
float ntasks = c->nr_tasks;
#if SWIFT_DEBUG_CHECKS
/* The c->nr_tasks field does not include all the tasks. So let's check this
* the hard way. Note pairs share the task 50/50 with the other cell. */
ntasks = 0.0f;
struct task *tasks = e->sched.tasks;
int nr_tasks = e->sched.nr_tasks;
for (int k = 0; k < nr_tasks; k++) {
if (tasks[k].cj == NULL) {
if (c == tasks[k].ci) {
ntasks = ntasks + 1.0f;
}
} else {
if (c == tasks[k].ci || c == tasks[k].cj) {
ntasks = ntasks + 0.5f;
}
}
}
#endif
/* Only locally active cells are dumped. */
if (c->count > 0 || c->gcount > 0 || c->scount > 0)
fprintf(file,
" %6.3f %6.3f %6.3f %6.3f %6.3f %6.3f %6d %6d %6d %6d "
"%6.1f %20lld %6d %6d %6d %6d\n",
c->loc[0], c->loc[1], c->loc[2], c->width[0], c->width[1],
c->width[2], c->count, c->gcount, c->scount, c->depth, ntasks,
c->ti_end_min, get_time_bin(c->ti_end_min), (c->super == c),
cell_is_active(c, e), c->nodeID);
}
/**
* @brief Dump the location, depth, task counts and timebins and active state,
* for all cells to a simple text file.
*
* @param prefix base output filename
* @param s the space holding the cells to dump.
*/
void dumpCells(const char *prefix, struct space *s) {
FILE *file = NULL;
/* Name of output file. */
static int nseq = 0;
char fname[200];
int uniq = atomic_inc(&nseq);
sprintf(fname, "%s_%03d.dat", prefix, uniq);
file = fopen(fname, "w");
/* Header. */
fprintf(file,
"# %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s %6s "
"%20s %6s %6s %6s\n",
"x", "y", "z", "xw", "yw", "zw", "count", "gcount", "scount", "depth",
"tasks", "ti_end_min", "timebin", "issuper", "active", "rank");
uintptr_t data[2];
data[0] = (size_t)file;
data[1] = (size_t)s->e;
space_map_cells_pre(s, 1, dumpCells_map, &data);
fclose(file);
}
#ifdef HAVE_METIS #ifdef HAVE_METIS
/** /**
......
...@@ -33,6 +33,7 @@ void printgParticle_single(struct gpart *gp); ...@@ -33,6 +33,7 @@ void printgParticle_single(struct gpart *gp);
int checkSpacehmax(struct space *s); int checkSpacehmax(struct space *s);
int checkCellhdxmax(const struct cell *c, int *depth); int checkCellhdxmax(const struct cell *c, int *depth);
void dumpCells(const char *prefix, struct space *s);
#ifdef HAVE_METIS #ifdef HAVE_METIS
#include "metis.h" #include "metis.h"
......
...@@ -2507,7 +2507,7 @@ void engine_maketasks(struct engine *e) { ...@@ -2507,7 +2507,7 @@ void engine_maketasks(struct engine *e) {
const ticks tic = getticks(); const ticks tic = getticks();
/* Re-set the scheduler. */ /* Re-set the scheduler. */
scheduler_reset(sched, s->tot_cells * engine_maxtaskspercell); scheduler_reset(sched, engine_estimate_nr_tasks(e));
/* Construct the firt hydro loop over neighbours */ /* Construct the firt hydro loop over neighbours */
if (e->policy & engine_policy_hydro) { if (e->policy & engine_policy_hydro) {
...@@ -2902,7 +2902,8 @@ void engine_print_task_counts(struct engine *e) { ...@@ -2902,7 +2902,8 @@ void engine_print_task_counts(struct engine *e) {
else else
counts[(int)tasks[k].type] += 1; counts[(int)tasks[k].type] += 1;
} }
message("Total = %d", nr_tasks); message("Total = %d (per cell = %d)", nr_tasks,
(int)ceil((double)nr_tasks / e->s->tot_cells));
#ifdef WITH_MPI #ifdef WITH_MPI
printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i", printf("[%04i] %s engine_print_task_counts: task counts are [ %s=%i",
e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]); e->nodeID, clocks_get_timesincestart(), taskID_names[0], counts[0]);
...@@ -2923,6 +2924,117 @@ void engine_print_task_counts(struct engine *e) { ...@@ -2923,6 +2924,117 @@ void engine_print_task_counts(struct engine *e) {
clocks_getunit()); clocks_getunit());
} }
/**
* @brief if necessary, estimate the number of tasks required given
* the current tasks in use and the numbers of cells.
*
* If e->tasks_per_cell is set greater than 0 then that value is used
* as the estimate of the average number of tasks per cell,
* otherwise we attempt an estimate.
*
* @param e the #engine
*
* @return the estimated total number of tasks
*/
int engine_estimate_nr_tasks(struct engine *e) {
int tasks_per_cell = e->tasks_per_cell;
if (tasks_per_cell > 0) return e->s->tot_cells * tasks_per_cell;
/* Our guess differs depending on the types of tasks we are using, but we
* basically use a formula <n1>*ntopcells + <n2>*(totcells - ntopcells).
* Where <n1> is the expected maximum tasks per top-level/super cell, and
* <n2> the expected maximum tasks for all other cells. These should give
* a safe upper limit.
*/
int n1 = 0;
int n2 = 0;
if (e->policy & engine_policy_hydro) {
n1 += 36;
n2 += 2;
#ifdef WITH_MPI
n1 += 6;
#endif
#ifdef EXTRA_HYDRO_LOOP
n1 += 15;
#ifdef WITH_MPI
n1 += 2;
#endif
#endif
}
if (e->policy & engine_policy_self_gravity) {
n1 += 24;
n2 += 1;
#ifdef WITH_MPI
n2 += 2;
#endif
}
if (e->policy & engine_policy_external_gravity) {
n1 += 2;
}
if (e->policy & engine_policy_cosmology) {
n1 += 2;
}
if (e->policy & engine_policy_cooling) {
n1 += 2;
}
if (e->policy & engine_policy_sourceterms) {
n1 += 2;
}
if (e->policy & engine_policy_stars) {
n1 += 2;
}
#ifdef WITH_MPI
/* We need fewer tasks per rank when using MPI, but we could have
* imbalances, so we need to work using the locally active cells, not just
* some equipartition amongst the nodes. Don't want to recurse the whole
* cell tree, so just make a guess of the maximum possible total cells. */
int ntop = 0;
int ncells = 0;
for (int k = 0; k < e->s->nr_cells; k++) {
struct cell *c = &e->s->cells_top[k];
/* Any cells with particles will have tasks (local & foreign). */
int nparts = c->count + c->gcount + c->scount;
if (nparts > 0) {
ntop++;
ncells++;
/* Count cell depth until we get below the parts per cell threshold. */
int depth = 0;
while (nparts > space_splitsize) {
depth++;
nparts /= 8;
ncells += (1 << (depth * 3));
}
}
}
/* If no local cells, we are probably still initialising, so just keep
* room for the top-level. */
if (ncells == 0) {
ntop = e->s->nr_cells;
ncells = ntop;
}
#else
int ntop = e->s->nr_cells;
int ncells = e->s->tot_cells;
#endif
double ntasks = n1 * ntop + n2 * (ncells - ntop);
tasks_per_cell = ceil(ntasks / ncells);
if (tasks_per_cell < 1.0) tasks_per_cell = 1.0;
if (e->verbose)
message("tasks per cell estimated as: %d, maximum tasks: %d",
tasks_per_cell, ncells * tasks_per_cell);
return ncells * tasks_per_cell;
}
/** /**
* @brief Rebuild the space and tasks. * @brief Rebuild the space and tasks.
* *
...@@ -4503,9 +4615,14 @@ void engine_init(struct engine *e, struct space *s, ...@@ -4503,9 +4615,14 @@ void engine_init(struct engine *e, struct space *s,
pthread_barrier_init(&e->run_barrier, NULL, e->nr_threads + 1) != 0) pthread_barrier_init(&e->run_barrier, NULL, e->nr_threads + 1) != 0)
error("Failed to initialize barrier."); error("Failed to initialize barrier.");
/* Init the scheduler with enough tasks for the initial sorting tasks. */ /* Expected average for tasks per cell. If set to zero we use a heuristic
const int nr_tasks = 2 * s->tot_cells + 2 * e->nr_threads; * guess based on the numbers of cells and how many tasks per cell we expect.
scheduler_init(&e->sched, e->s, nr_tasks, nr_queues, */
e->tasks_per_cell =
parser_get_opt_param_int(params, "Scheduler:tasks_per_cell", 0);
/* Init the scheduler. */
scheduler_init(&e->sched, e->s, engine_estimate_nr_tasks(e), nr_queues,
(policy & scheduler_flag_steal), e->nodeID, &e->threadpool); (policy & scheduler_flag_steal), e->nodeID, &e->threadpool);
/* Allocate and init the threads. */ /* Allocate and init the threads. */
......
...@@ -75,7 +75,6 @@ enum engine_policy { ...@@ -75,7 +75,6 @@ enum engine_policy {
extern const char *engine_policy_names[]; extern const char *engine_policy_names[];
#define engine_queue_scale 1.2 #define engine_queue_scale 1.2
#define engine_maxtaskspercell 96
#define engine_maxproxies 64 #define engine_maxproxies 64
#define engine_tasksreweight 1 #define engine_tasksreweight 1
#define engine_parts_size_grow 1.05 #define engine_parts_size_grow 1.05
...@@ -222,6 +221,10 @@ struct engine { ...@@ -222,6 +221,10 @@ struct engine {
struct link *links; struct link *links;
int nr_links, size_links; int nr_links, size_links;
/* Average number of tasks per cell. Used to estimate the sizes
* of the various task arrays. */
int tasks_per_cell;
/* Are we talkative ? */ /* Are we talkative ? */
int verbose; int verbose;
...@@ -292,5 +295,6 @@ int engine_is_done(struct engine *e); ...@@ -292,5 +295,6 @@ int engine_is_done(struct engine *e);
void engine_pin(); void engine_pin();
void engine_unpin(); void engine_unpin();
void engine_clean(struct engine *e); void engine_clean(struct engine *e);
int engine_estimate_nr_tasks(struct engine *e);
#endif /* SWIFT_ENGINE_H */ #endif /* SWIFT_ENGINE_H */
...@@ -772,7 +772,11 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, ...@@ -772,7 +772,11 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type,
const int ind = atomic_inc(&s->tasks_next); const int ind = atomic_inc(&s->tasks_next);
/* Overflow? */ /* Overflow? */
if (ind >= s->size) error("Task list overflow."); if (ind >= s->size)
error(
"Task list overflow (%d). Need to increase "
"Scheduler:tasks_per_cell.",
ind);
/* Get a pointer to the new task. */ /* Get a pointer to the new task. */
struct task *t = &s->tasks[ind]; struct task *t = &s->tasks[ind];
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment