Commit 49e93734 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Merge branch 'kill_drift' into 'master'

Only drift the particles that need drifting

Here is an algorithmic speed-up. The plan is to drift only the cells that contain an active particles and the cells that have at least one neighbour with an active particle. 

 - If this condition is not met then `runner_do_drift()` just returns without performing any operation. 

 - If the condition is met, `runner_do_drift()` moves all particles (as before) from the last point in time where a drift was performed. 

We record for each cell the last time a drift was done such that we now by how much to drift things when needed.

Two exceptions to this rule: before reconstructing and before duping a snapshot,  we drift all particles as we need correct position information at these two stages.

Note that I have made this an engine policy to ease the testing. Drifting all particles can be restored by setting the right flag.

@nnrw56  could you check that the function `cell_is_drift_needed()` makes sense and fully does what it says it does ? Also could you let me know whether you also agree with the whole logic and implementation.


See merge request !225
parents 5641fa31 5c4963fc
......@@ -22,12 +22,12 @@ Valid options are:
-d Dry run. Read the parameter file, allocate memory but does not read
the particles from ICs and exit before the start of time integration.
Allows user to check validy of parameter and IC files as well as memory limits.
-D Always drift all particles even the ones far from active particles.
-e Enable floating-point exceptions (debugging mode)
-f {int} Overwrite the CPU frequency (Hz) to be used for time measurements
-g Run with an external gravitational potential
-G Run with self-gravity
-n {int} Execute a fixed number of time steps. When unset use the time_end
parameter to stop.
-n {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop.
-s Run with SPH
-t {int} The number of threads to use on each MPI rank. Defaults to 1 if not specified.
-v [12] Increase the level of verbosity 1: MPI-rank 0 writes
......
......@@ -67,6 +67,8 @@ void print_help_message() {
printf(" %2s %8s %s\n", "", "",
"Allows user to check validy of parameter and IC files as well as "
"memory limits.");
printf(" %2s %8s %s\n", "-D", "",
"Always drift all particles even the ones far from active particles.");
printf(" %2s %8s %s\n", "-e", "",
"Enable floating-point exceptions (debugging mode)");
printf(" %2s %8s %s\n", "-f", "{int}",
......@@ -75,7 +77,8 @@ void print_help_message() {
"Run with an external gravitational potential");
printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity");
printf(" %2s %8s %s\n", "-n", "{int}",
"Execute a fixed number of time steps");
"Execute a fixed number of time steps. When unset use the time_end "
"parameter to stop.");
printf(" %2s %8s %s\n", "-s", "", "Run with SPH");
printf(" %2s %8s %s\n", "-t", "{int}",
"The number of threads to use on each MPI rank. Defaults to 1 if not "
......@@ -146,6 +149,7 @@ int main(int argc, char *argv[]) {
int with_self_gravity = 0;
int with_hydro = 0;
int with_fp_exceptions = 0;
int with_drift_all = 0;
int verbose = 0;
int nr_threads = 1;
char paramFileName[200] = "";
......@@ -153,7 +157,7 @@ int main(int argc, char *argv[]) {
/* Parse the parameters */
int c;
while ((c = getopt(argc, argv, "acdef:gGhn:st:v:y:")) != -1) switch (c) {
while ((c = getopt(argc, argv, "acdDef:gGhn:st:v:y:")) != -1) switch (c) {
case 'a':
with_aff = 1;
break;
......@@ -163,6 +167,9 @@ int main(int argc, char *argv[]) {
case 'd':
dry_run = 1;
break;
case 'D':
with_drift_all = 1;
break;
case 'e':
with_fp_exceptions = 1;
break;
......@@ -426,6 +433,7 @@ int main(int argc, char *argv[]) {
/* Construct the engine policy */
int engine_policies = ENGINE_POLICY | engine_policy_steal;
if (with_drift_all) engine_policies |= engine_policy_drift_all;
if (with_hydro) engine_policies |= engine_policy_hydro;
if (with_self_gravity) engine_policies |= engine_policy_self_gravity;
if (with_external_gravity) engine_policies |= engine_policy_external_gravity;
......
......@@ -63,7 +63,6 @@ int cell_next_tag = 0;
*
* @param c The #cell.
*/
int cell_getsize(struct cell *c) {
/* Number of cells in this subtree. */
......@@ -87,9 +86,10 @@ int cell_getsize(struct cell *c) {
*
* @return The number of cells created.
*/
int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
#ifdef WITH_MPI
/* Unpack the current pcell. */
c->h_max = pc->h_max;
c->ti_end_min = pc->ti_end_min;
......@@ -130,6 +130,11 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
/* Return the total number of unpacked cells. */
c->pcell_size = count;
return count;
#else
error("SWIFT was not compiled with MPI support.");
return 0;
#endif
}
/**
......@@ -140,7 +145,6 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) {
*
* @return The number of particles linked.
*/
int cell_link_parts(struct cell *c, struct part *parts) {
c->parts = parts;
......@@ -166,7 +170,6 @@ int cell_link_parts(struct cell *c, struct part *parts) {
*
* @return The number of particles linked.
*/
int cell_link_gparts(struct cell *c, struct gpart *gparts) {
c->gparts = gparts;
......@@ -193,9 +196,10 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts) {
*
* @return The number of packed cells.
*/
int cell_pack(struct cell *c, struct pcell *pc) {
#ifdef WITH_MPI
/* Start by packing the data of the current cell. */
pc->h_max = c->h_max;
pc->ti_end_min = c->ti_end_min;
......@@ -216,10 +220,25 @@ int cell_pack(struct cell *c, struct pcell *pc) {
/* Return the number of packed cells used. */
c->pcell_size = count;
return count;
#else
error("SWIFT was not compiled with MPI support.");
return 0;
#endif
}
/**
* @brief Pack the time information of the given cell and all it's sub-cells.
*
* @param c The #cell.
* @param ti_ends (output) The time information we pack into
*
* @return The number of packed cells.
*/
int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
#ifdef WITH_MPI
/* Pack this cell's data. */
ti_ends[0] = c->ti_end_min;
......@@ -232,10 +251,25 @@ int cell_pack_ti_ends(struct cell *c, int *ti_ends) {
/* Return the number of packed values. */
return count;
#else
error("SWIFT was not compiled with MPI support.");
return 0;
#endif
}
/**
* @brief Unpack the time information of a given cell and its sub-cells.
*
* @param c The #cell
* @param ti_ends The time information to unpack
*
* @return The number of cells created.
*/
int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
#ifdef WITH_MPI
/* Unpack this cell's data. */
c->ti_end_min = ti_ends[0];
......@@ -248,14 +282,19 @@ int cell_unpack_ti_ends(struct cell *c, int *ti_ends) {
/* Return the number of packed values. */
return count;
#else
error("SWIFT was not compiled with MPI support.");
return 0;
#endif
}
/**
* @brief Lock a cell and hold its parents.
* @brief Lock a cell for access to its array of #part and hold its parents.
*
* @param c The #cell.
* @return 0 on success, 1 on failure
*/
int cell_locktree(struct cell *c) {
TIMER_TIC
......@@ -314,6 +353,12 @@ int cell_locktree(struct cell *c) {
}
}
/**
* @brief Lock a cell for access to its array of #gpart and hold its parents.
*
* @param c The #cell.
* @return 0 on success, 1 on failure
*/
int cell_glocktree(struct cell *c) {
TIMER_TIC
......@@ -373,11 +418,10 @@ int cell_glocktree(struct cell *c) {
}
/**
* @brief Unlock a cell's parents.
* @brief Unlock a cell's parents for access to #part array.
*
* @param c The #cell.
*/
void cell_unlocktree(struct cell *c) {
TIMER_TIC
......@@ -392,6 +436,11 @@ void cell_unlocktree(struct cell *c) {
TIMER_TOC(timer_locktree);
}
/**
* @brief Unlock a cell's parents for access to #gpart array.
*
* @param c The #cell.
*/
void cell_gunlocktree(struct cell *c) {
TIMER_TIC
......@@ -413,7 +462,6 @@ void cell_gunlocktree(struct cell *c) {
* @param parts_offset Offset of the cell parts array relative to the
* space's parts array, i.e. c->parts - s->parts.
*/
void cell_split(struct cell *c, ptrdiff_t parts_offset) {
int i, j;
......@@ -630,57 +678,9 @@ void cell_split(struct cell *c, ptrdiff_t parts_offset) {
part_relink_parts(gparts, gcount, parts - parts_offset);
}
/**
* @brief Initialises all particles to a valid state even if the ICs were stupid
*
* @param c Cell to act upon
* @param data Unused parameter
*/
void cell_init_parts(struct cell *c, void *data) {
struct part *restrict p = c->parts;
struct xpart *restrict xp = c->xparts;
const size_t count = c->count;
for (size_t i = 0; i < count; ++i) {
p[i].ti_begin = 0;
p[i].ti_end = 0;
xp[i].v_full[0] = p[i].v[0];
xp[i].v_full[1] = p[i].v[1];
xp[i].v_full[2] = p[i].v[2];
hydro_first_init_part(&p[i], &xp[i]);
hydro_init_part(&p[i]);
hydro_reset_acceleration(&p[i]);
}
c->ti_end_min = 0;
c->ti_end_max = 0;
}
/**
* @brief Initialises all g-particles to a valid state even if the ICs were
*stupid
*
* @param c Cell to act upon
* @param data Unused parameter
*/
void cell_init_gparts(struct cell *c, void *data) {
struct gpart *restrict gp = c->gparts;
const size_t gcount = c->gcount;
for (size_t i = 0; i < gcount; ++i) {
gp[i].ti_begin = 0;
gp[i].ti_end = 0;
gravity_first_init_gpart(&gp[i]);
gravity_init_gpart(&gp[i]);
}
c->ti_end_min = 0;
c->ti_end_max = 0;
}
/**
* @brief Converts hydro quantities to a valid state after the initial density
*calculation
* calculation
*
* @param c Cell to act upon
* @param data Unused parameter
......@@ -712,7 +712,6 @@ void cell_clean_links(struct cell *c, void *data) {
}
/**
<<<<<<< HEAD
* @brief Checks whether the cells are direct neighbours ot not. Both cells have
* to be of the same size
*
......@@ -797,8 +796,10 @@ void cell_check_multipole(struct cell *c, void *data) {
}
}
/*
* @brief Frees up the memory allocated for this #cell
/**
* @brief Frees up the memory allocated for this #cell.
*
* @param c The #cell.
*/
void cell_clean(struct cell *c) {
......@@ -808,3 +809,32 @@ void cell_clean(struct cell *c) {
for (int k = 0; k < 8; k++)
if (c->progeny[k]) cell_clean(c->progeny[k]);
}
/**
* @brief Checks whether a given cell needs drifting or not.
*
* @param c the #cell.
* @param ti_current The current time on the integer time-line.
*
* @return 1 If the cell needs drifting, 0 otherwise.
*/
int cell_is_drift_needed(struct cell *c, int ti_current) {
/* Do we have at least one active particle in the cell ?*/
if (c->ti_end_min == ti_current) return 1;
/* Loop over the pair tasks that involve this cell */
for (struct link *l = c->density; l != NULL; l = l->next) {
if (l->t->type != task_type_pair && l->t->type != task_type_sub_pair)
continue;
/* Does the other cell in the pair have an active particle ? */
if ((l->t->ci == c && l->t->cj->ti_end_min == ti_current) ||
(l->t->cj == c && l->t->ci->ti_end_min == ti_current))
return 1;
}
/* No neighbouring cell has active particles. Drift not necessary */
return 0;
}
......@@ -23,6 +23,9 @@
#ifndef SWIFT_CELL_H
#define SWIFT_CELL_H
/* Config parameters. */
#include "../config.h"
/* Includes. */
#include <stddef.h>
......@@ -41,9 +44,21 @@ struct space;
* The maximum was lowered by a further factor of 2 to be on the safe side.*/
#define cell_max_tag (1 << 29)
#define cell_align 32
/* Global variables. */
extern int cell_next_tag;
/* Mini struct to link cells to tasks. Used as a linked list. */
struct link {
/* The task pointer. */
struct task *t;
/* The next pointer. */
struct link *next;
};
/* Packed cell. */
struct pcell {
......@@ -70,19 +85,22 @@ struct cell {
/* The cell dimensions. */
double width[3];
/* Max radii in this cell. */
/* Max smoothing length in this cell. */
double h_max;
/* Minimum and maximum end of time step in this cell. */
int ti_end_min, ti_end_max;
/* Last time the cell's content was drifted forward in time. */
int ti_old;
/* Minimum dimension, i.e. smallest edge of this cell. */
float dmin;
/* Maximum slack allowed for particle movement. */
float slack;
/* Maximum particle movement in this cell. */
/* Maximum particle movement in this cell since last construction. */
float dx_max;
/* The depth of this cell in the tree. */
......@@ -124,12 +142,16 @@ struct cell {
/* The hierarchical tasks. */
struct task *extra_ghost, *ghost, *init, *kick;
#ifdef WITH_MPI
/* Task receiving data. */
struct task *recv_xv, *recv_rho, *recv_gradient, *recv_ti;
/* Task send data. */
struct link *send_xv, *send_rho, *send_gradient, *send_ti;
#endif
/* Tasks for gravity tree. */
struct task *grav_up, *grav_down;
......@@ -160,9 +182,14 @@ struct cell {
/* Linking pointer for "memory management". */
struct cell *next;
/* This cell's multipole. */
struct multipole multipole;
/* ID of the node this cell lives on. */
int nodeID;
#ifdef WITH_MPI
/* Bit mask of the proxies this cell is registered with. */
unsigned long long int sendto;
......@@ -171,10 +198,9 @@ struct cell {
int pcell_size;
int tag;
/* This cell's multipole. */
struct multipole multipole;
#endif
} __attribute__((aligned(64)));
} __attribute__((aligned(cell_align)));
/* Convert cell location to ID. */
#define cell_getid(cdim, i, j, k) \
......@@ -193,13 +219,12 @@ int cell_unpack_ti_ends(struct cell *c, int *ti_ends);
int cell_getsize(struct cell *c);
int cell_link_parts(struct cell *c, struct part *parts);
int cell_link_gparts(struct cell *c, struct gpart *gparts);
void cell_init_parts(struct cell *c, void *data);
void cell_init_gparts(struct cell *c, void *data);
void cell_convert_hydro(struct cell *c, void *data);
void cell_clean_links(struct cell *c, void *data);
int cell_are_neighbours(const struct cell *restrict ci,
const struct cell *restrict cj);
void cell_check_multipole(struct cell *c, void *data);
void cell_clean(struct cell *c);
int cell_is_drift_needed(struct cell *c, int ti_current);
#endif /* SWIFT_CELL_H */
......@@ -68,7 +68,7 @@
#include "units.h"
#include "version.h"
const char *engine_policy_names[13] = {"none",
const char *engine_policy_names[14] = {"none",
"rand",
"steal",
"keep",
......@@ -80,7 +80,8 @@ const char *engine_policy_names[13] = {"none",
"hydro",
"self_gravity",
"external_gravity",
"cosmology_integration"};
"cosmology_integration",
"drift_all"};
/** The rank of the engine as a global variable (for messages). */
int engine_rank;
......@@ -2051,6 +2052,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
}
}
#ifdef WITH_MPI
/* Activate the send/recv flags. */
if (ci->nodeID != engine_rank) {
......@@ -2105,6 +2108,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements,
if (l == NULL) error("Missing link to send_ti task.");
l->t->skip = 0;
}
#endif
}
/* Kick? */
......@@ -2270,6 +2275,16 @@ void engine_prepare(struct engine *e) {
/* Did this not go through? */
if (rebuild) {
/* First drift all particles to the current time */
e->drift_all = 1;
threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
e->s->nr_cells, sizeof(struct cell), 1, e);
/* Restore the default drifting policy */
e->drift_all = (e->policy & engine_policy_drift_all);
/* And now rebuild */
engine_rebuild(e);
}
......@@ -2650,9 +2665,13 @@ void engine_step(struct engine *e) {
snapshot_drift_time = e->timeStep;
/* Drift everybody to the snapshot position */
e->drift_all = 1;
threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
e->s->nr_cells, sizeof(struct cell), 1, e);
/* Restore the default drifting policy */
e->drift_all = (e->policy & engine_policy_drift_all);
/* Dump... */
engine_dump_snapshot(e);
......@@ -2668,10 +2687,6 @@ void engine_step(struct engine *e) {
e->timeOld = e->ti_old * e->timeBase + e->timeBegin;
e->timeStep = (e->ti_current - e->ti_old) * e->timeBase + snapshot_drift_time;
/* Drift everybody */
threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
e->s->nr_cells, sizeof(struct cell), 1, e);
if (e->nodeID == 0) {
/* Print some information to the screen */
......@@ -2690,6 +2705,10 @@ void engine_step(struct engine *e) {
e->timeLastStatistics += e->deltaTimeStatistics;
}
/* Drift only the necessary particles */
threadpool_map(&e->threadpool, runner_do_drift_mapper, e->s->cells,
e->s->nr_cells, sizeof(struct cell), 1, e);
/* Re-distribute the particles amongst the nodes? */
if (e->forcerepart != REPART_NONE) engine_repartition(e);
......@@ -3104,6 +3123,7 @@ void engine_init(struct engine *e, struct space *s,
e->timeStep = 0.;
e->timeBase = 0.;
e->timeBase_inv = 0.;
e->drift_all = (policy & engine_policy_drift_all);
e->internalUnits = internal_units;
e->timeFirstSnapshot =
parser_get_param_double(params, "Snapshots:time_first");
......
......@@ -61,7 +61,8 @@ enum engine_policy {
engine_policy_hydro = (1 << 8),
engine_policy_self_gravity = (1 << 9),
engine_policy_external_gravity = (1 << 10),
engine_policy_cosmology = (1 << 11)
engine_policy_cosmology = (1 << 11),
engine_policy_drift_all = (1 << 12)
};
extern const char *engine_policy_names[];
......@@ -81,16 +82,6 @@ extern int engine_rank;
/* The maximal number of timesteps in a simulation */
#define max_nr_timesteps (1 << 28)
/* Mini struct to link cells to density/force tasks. */
struct link {
/* The task pointer. */
struct task *t;
/* The next pointer. */
struct link *next;
};
/* Data structure for the engine. */
struct engine {
......@@ -139,6 +130,9 @@ struct engine {
/* Minimal ti_end for the next time-step */
int ti_end_min;
/* Are we drifting all particles now ? */
int drift_all;
/* Number of particles updated */
size_t updates, g_updates;
......
......@@ -654,15 +654,22 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
static void runner_do_drift(struct cell *c, struct engine *e) {
const double timeBase = e->timeBase;
const double dt = (e->ti_current - e->ti_old) * timeBase;
const int ti_old = e->ti_old;
const int ti_old = c->ti_old;
const int ti_current = e->ti_current;
struct part *const parts = c->parts;
struct xpart *const xparts = c->xparts;
struct gpart *const gparts = c->gparts;
float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
/* Do we need to drift ? */
if (!e->drift_all && !cell_is_drift_needed(c, ti_current)) return;
/* Check that we are actually going to move forward. */
if (ti_current == ti_old) return;
/* Drift from the last time the cell was drifted to the current time */
const double dt = (ti_current - ti_old) * timeBase;
float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f;
double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, entropy = 0.0, mass = 0.0;
double mom[3] = {0.0, 0.0, 0.0};
double ang_mom[3] = {0.0, 0.0, 0.0};
......@@ -785,6 +792,9 @@ static void runner_do_drift(struct cell *c, struct engine *e) {
c->ang_mom[0] = ang_mom[0];
c->ang_mom[1] = ang_mom[1];
c->ang_mom[2] = ang_mom[2];
/* Update the time of the last drift */
c->ti_old = ti_current;
}
/**
......
......@@ -164,6 +164,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
const size_t nr_parts = s->nr_parts;
struct cell *restrict c;
ticks tic = getticks();
const int ti_current = (s->e != NULL) ? s->e->ti_current : 0;
/* Run through the cells and get the current h_max. */
// tic = getticks();
......@@ -271,7 +272,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
/* Allocate the highest level of cells. */
s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2];
if (posix_memalign((void *)&s->cells, 64,
if (posix_memalign((void *)</