From 35337e9f07888b46b41f5a6c7d3096e4fb8adac9 Mon Sep 17 00:00:00 2001 From: Matthieu Schaller <matthieu.schaller@durham.ac.uk> Date: Fri, 5 May 2017 18:16:55 +0100 Subject: [PATCH] Split the drift task between the part and gparts --- src/active.h | 33 +++- src/cell.c | 210 +++++++++++++++------- src/cell.h | 33 ++-- src/debug.c | 4 +- src/engine.c | 75 ++++---- src/runner.c | 42 +++-- src/runner_doiact.h | 20 +-- src/runner_doiact_grav.h | 6 +- src/runner_doiact_vec.c | 368 +-------------------------------------- src/scheduler.c | 51 ++++-- src/space.c | 23 ++- src/task.c | 48 ++--- src/task.h | 3 +- src/timers.c | 3 +- src/timers.h | 3 +- 15 files changed, 364 insertions(+), 558 deletions(-) diff --git a/src/active.h b/src/active.h index 02e504f762..58e88835b6 100644 --- a/src/active.h +++ b/src/active.h @@ -29,25 +29,48 @@ #include "timeline.h" /** - * @brief Check that a cell been drifted to the current time. + * @brief Check that the #part in a #cell have been drifted to the current time. * * @param c The #cell. * @param e The #engine containing information about the current time. * @return 1 if the #cell has been drifted to the current time, 0 otherwise. */ -__attribute__((always_inline)) INLINE static int cell_is_drifted( +__attribute__((always_inline)) INLINE static int cell_are_part_drifted( const struct cell *c, const struct engine *e) { #ifdef SWIFT_DEBUG_CHECKS - if (c->ti_old > e->ti_current) + if (c->ti_old_part > e->ti_current) error( "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " "and e->ti_current=%lld (t=%e)", - c->ti_old, c->ti_old * e->timeBase, e->ti_current, + c->ti_old_part, c->ti_old_part * e->timeBase, e->ti_current, e->ti_current * e->timeBase); #endif - return (c->ti_old == e->ti_current); + return (c->ti_old_part == e->ti_current); +} + +/** + * @brief Check that the #gpart in a #cell have been drifted to the current + * time. + * + * @param c The #cell. + * @param e The #engine containing information about the current time. + * @return 1 if the #cell has been drifted to the current time, 0 otherwise. + */ +__attribute__((always_inline)) INLINE static int cell_are_gpart_drifted( + const struct cell *c, const struct engine *e) { + +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_old_gpart > e->ti_current) + error( + "Cell has been drifted too far forward in time! c->ti_old=%lld (t=%e) " + "and e->ti_current=%lld (t=%e)", + c->ti_old_gpart, c->ti_old_gpart * e->timeBase, e->ti_current, + e->ti_current * e->timeBase); +#endif + + return (c->ti_old_gpart == e->ti_current); } /* Are cells / particles active for regular tasks ? */ diff --git a/src/cell.c b/src/cell.c index 1c20add31d..0cf6c1b761 100644 --- a/src/cell.c +++ b/src/cell.c @@ -99,7 +99,8 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->h_max = pc->h_max; c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; - c->ti_old = pc->ti_old; + c->ti_old_part = pc->ti_old_part; + c->ti_old_gpart = pc->ti_old_gpart; c->count = pc->count; c->gcount = pc->gcount; c->scount = pc->scount; @@ -128,7 +129,8 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { if (k & 1) temp->loc[2] += temp->width[2]; temp->depth = c->depth + 1; temp->split = 0; - temp->dx_max = 0.f; + temp->dx_max_part = 0.f; + temp->dx_max_gpart = 0.f; temp->dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; @@ -239,7 +241,8 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->h_max = c->h_max; pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; - pc->ti_old = c->ti_old; + pc->ti_old_part = c->ti_old_part; + pc->ti_old_gpart = c->ti_old_gpart; pc->count = c->count; pc->gcount = c->gcount; pc->scount = c->scount; @@ -1018,7 +1021,7 @@ void cell_clean_links(struct cell *c, void *data) { } /** - * @brief Checks that the particles in a cell are at the + * @brief Checks that the #part in a cell are at the * current point in time * * Calls error() if the cell is not at the current time. @@ -1026,7 +1029,7 @@ void cell_clean_links(struct cell *c, void *data) { * @param c Cell to act upon * @param data The current time on the integer time-line */ -void cell_check_particle_drift_point(struct cell *c, void *data) { +void cell_check_part_drift_point(struct cell *c, void *data) { #ifdef SWIFT_DEBUG_CHECKS @@ -1035,14 +1038,40 @@ void cell_check_particle_drift_point(struct cell *c, void *data) { /* Only check local cells */ if (c->nodeID != engine_rank) return; - if (c->ti_old != ti_drift) - error("Cell in an incorrect time-zone! c->ti_old=%lld ti_drift=%lld", - c->ti_old, ti_drift); + if (c->ti_old_part != ti_drift) + error("Cell in an incorrect time-zone! c->ti_old_part=%lld ti_drift=%lld", + c->ti_old_part, ti_drift); for (int i = 0; i < c->count; ++i) if (c->parts[i].ti_drift != ti_drift) error("part in an incorrect time-zone! p->ti_drift=%lld ti_drift=%lld", c->parts[i].ti_drift, ti_drift); +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + +/** + * @brief Checks that the #gpart and #spart in a cell are at the + * current point in time + * + * Calls error() if the cell is not at the current time. + * + * @param c Cell to act upon + * @param data The current time on the integer time-line + */ +void cell_check_gpart_drift_point(struct cell *c, void *data) { + +#ifdef SWIFT_DEBUG_CHECKS + + const integertime_t ti_drift = *(integertime_t *)data; + + /* Only check local cells */ + if (c->nodeID != engine_rank) return; + + if (c->ti_old_gpart != ti_drift) + error("Cell in an incorrect time-zone! c->ti_old_gpart=%lld ti_drift=%lld", + c->ti_old_gpart, ti_drift); for (int i = 0; i < c->gcount; ++i) if (c->gparts[i].ti_drift != ti_drift) @@ -1327,7 +1356,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { error("bad flags in sort task."); #endif scheduler_activate(s, ci->sorts); - if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part); } if (!(cj->sorted & (1 << t->flags))) { #ifdef SWIFT_DEBUG_CHECKS @@ -1335,7 +1364,7 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { error("bad flags in sort task."); #endif scheduler_activate(s, cj->sorts); - if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part); } } @@ -1344,7 +1373,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Check whether there was too much particle motion, i.e. the cell neighbour conditions were violated. */ - if (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) + if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part > + cj->dmin) rebuild = 1; #ifdef WITH_MPI @@ -1367,11 +1397,11 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, cj->drift); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -1405,11 +1435,11 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, ci->drift); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -1425,13 +1455,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); } } else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #endif } @@ -1447,7 +1477,8 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); if (c->ghost != NULL) scheduler_activate(s, c->ghost); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); - if (c->drift != NULL) scheduler_activate(s, c->drift); + if (c->drift_part != NULL) scheduler_activate(s, c->drift_part); + if (c->drift_gpart != NULL) scheduler_activate(s, c->drift_gpart); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); @@ -1480,30 +1511,28 @@ void cell_set_super(struct cell *c, struct cell *super) { } /** - * @brief Recursively drifts particles of all kinds in a cell hierarchy. + * @brief Recursively drifts the #part in a cell hierarchy. * * @param c The #cell. * @param e The #engine (to get ti_current). */ -void cell_drift_particles(struct cell *c, const struct engine *e) { +void cell_drift_part(struct cell *c, const struct engine *e) { const float hydro_h_max = e->hydro_properties->h_max; const double timeBase = e->timeBase; - const integertime_t ti_old = c->ti_old; + const integertime_t ti_old_part = c->ti_old_part; const integertime_t ti_current = e->ti_current; struct part *const parts = c->parts; struct xpart *const xparts = c->xparts; - struct gpart *const gparts = c->gparts; - struct spart *const sparts = c->sparts; /* Drift from the last time the cell was drifted to the current time */ - const double dt = (ti_current - ti_old) * timeBase; + const double dt = (ti_current - ti_old_part) * timeBase; float dx_max = 0.f, dx2_max = 0.f; float dx_max_sort = 0.0f, dx2_max_sort = 0.f; float cell_h_max = 0.f; /* Check that we are actually going to move forward. */ - if (ti_current < ti_old) error("Attempt to drift to the past"); + if (ti_current < ti_old_part) error("Attempt to drift to the past"); /* Are we not in a leaf ? */ if (c->split) { @@ -1514,37 +1543,15 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { struct cell *cp = c->progeny[k]; /* Collect */ - cell_drift_particles(cp, e); + cell_drift_part(cp, e); /* Update */ - dx_max = max(dx_max, cp->dx_max); + dx_max = max(dx_max, cp->dx_max_part); dx_max_sort = max(dx_max_sort, cp->dx_max_sort); cell_h_max = max(cell_h_max, cp->h_max); } - } else if (ti_current > ti_old) { - - /* Loop over all the g-particles in the cell */ - const size_t nr_gparts = c->gcount; - for (size_t k = 0; k < nr_gparts; k++) { - - /* Get a handle on the gpart. */ - struct gpart *const gp = &gparts[k]; - - /* Drift... */ - drift_gpart(gp, dt, timeBase, ti_old, ti_current); - - /* Compute (square of) motion since last cell construction */ - const float dx2 = gp->x_diff[0] * gp->x_diff[0] + - gp->x_diff[1] * gp->x_diff[1] + - gp->x_diff[2] * gp->x_diff[2]; - dx2_max = max(dx2_max, dx2); - - /* Init gravity force fields. */ - if (gpart_is_active(gp, e)) { - gravity_init_gpart(gp); - } - } + } else if (ti_current > ti_old_part) { /* Loop over all the gas particles in the cell */ const size_t nr_parts = c->count; @@ -1555,7 +1562,7 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { struct xpart *const xp = &xparts[k]; /* Drift... */ - drift_part(p, xp, dt, timeBase, ti_old, ti_current); + drift_part(p, xp, dt, timeBase, ti_old_part, ti_current); /* Limit h to within the allowed range */ p->h = min(p->h, hydro_h_max); @@ -1579,6 +1586,86 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { } } + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); + + } else { + + cell_h_max = c->h_max; + dx_max = c->dx_max_part; + dx_max_sort = c->dx_max_sort; + } + + /* Store the values */ + c->h_max = cell_h_max; + c->dx_max_part = dx_max; + c->dx_max_sort = dx_max_sort; + + /* Update the time of the last drift */ + c->ti_old_part = ti_current; +} + +/** + * @brief Recursively drifts the #gpart in a cell hierarchy. + * + * @param c The #cell. + * @param e The #engine (to get ti_current). + */ +void cell_drift_gpart(struct cell *c, const struct engine *e) { + + const double timeBase = e->timeBase; + const integertime_t ti_old_gpart = c->ti_old_gpart; + const integertime_t ti_current = e->ti_current; + struct gpart *const gparts = c->gparts; + struct spart *const sparts = c->sparts; + + /* Drift from the last time the cell was drifted to the current time */ + const double dt = (ti_current - ti_old_gpart) * timeBase; + float dx_max = 0.f, dx2_max = 0.f; + + /* Check that we are actually going to move forward. */ + if (ti_current < ti_old_gpart) error("Attempt to drift to the past"); + + /* Are we not in a leaf ? */ + if (c->split) { + + /* Loop over the progeny and collect their data. */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) { + struct cell *cp = c->progeny[k]; + + /* Recurse */ + cell_drift_gpart(cp, e); + + /* Update */ + dx_max = max(dx_max, cp->dx_max_gpart); + } + + } else if (ti_current > ti_old_gpart) { + + /* Loop over all the g-particles in the cell */ + const size_t nr_gparts = c->gcount; + for (size_t k = 0; k < nr_gparts; k++) { + + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; + + /* Drift... */ + drift_gpart(gp, dt, timeBase, ti_old_gpart, ti_current); + + /* Compute (square of) motion since last cell construction */ + const float dx2 = gp->x_diff[0] * gp->x_diff[0] + + gp->x_diff[1] * gp->x_diff[1] + + gp->x_diff[2] * gp->x_diff[2]; + dx2_max = max(dx2_max, dx2); + + /* Init gravity force fields. */ + if (gpart_is_active(gp, e)) { + gravity_init_gpart(gp); + } + } + /* Loop over all the star particles in the cell */ const size_t nr_sparts = c->scount; for (size_t k = 0; k < nr_sparts; k++) { @@ -1587,29 +1674,24 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { struct spart *const sp = &sparts[k]; /* Drift... */ - drift_spart(sp, dt, timeBase, ti_old, ti_current); + drift_spart(sp, dt, timeBase, ti_old_gpart, ti_current); /* Note: no need to compute dx_max as all spart have a gpart */ } /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); - dx_max_sort = sqrtf(dx2_max_sort); } else { - cell_h_max = c->h_max; - dx_max = c->dx_max; - dx_max_sort = c->dx_max_sort; + dx_max = c->dx_max_gpart; } /* Store the values */ - c->h_max = cell_h_max; - c->dx_max = dx_max; - c->dx_max_sort = dx_max_sort; + c->dx_max_gpart = dx_max; /* Update the time of the last drift */ - c->ti_old = ti_current; + c->ti_old_gpart = ti_current; } /** diff --git a/src/cell.h b/src/cell.h index d04e20ca33..791e5c9744 100644 --- a/src/cell.h +++ b/src/cell.h @@ -74,7 +74,7 @@ struct pcell { /* Stats on this cell's particles. */ double h_max; - integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old; + integertime_t ti_end_min, ti_end_max, ti_beg_max, ti_old_part, ti_old_gpart; /* Number of particles in this cell. */ int count, gcount, scount; @@ -157,8 +157,11 @@ struct cell { /*! The extra ghost task for complex hydro schemes */ struct task *extra_ghost; - /*! The drift task */ - struct task *drift; + /*! The drift task for parts */ + struct task *drift_part; + + /*! The drift task for gparts */ + struct task *drift_gpart; /*! The first kick task */ struct task *kick1; @@ -230,24 +233,30 @@ struct cell { /*! Maximum beginning of (integer) time step in this cell. */ integertime_t ti_beg_max; - /*! Last (integer) time the cell's particle was drifted forward in time. */ - integertime_t ti_old; - /*! Last (integer) time the cell's sort arrays were updated. */ integertime_t ti_sort; + /*! Last (integer) time the cell's part were drifted forward in time. */ + integertime_t ti_old_part; + + /*! Last (integer) time the cell's gpart were drifted forward in time. */ + integertime_t ti_old_gpart; + /*! Last (integer) time the cell's multipole was drifted forward in time. */ integertime_t ti_old_multipole; /*! Minimum dimension, i.e. smallest edge of this cell (min(width)). */ float dmin; - /*! Maximum particle movement in this cell since last construction. */ - float dx_max; - /*! Maximum particle movement in this cell since the last sort. */ float dx_max_sort; + /*! Maximum part movement in this cell since last construction. */ + float dx_max_part; + + /*! Maximum gpart movement in this cell since last construction. */ + float dx_max_gpart; + /*! Nr of #part in this cell. */ int count; @@ -354,13 +363,15 @@ void cell_clean_links(struct cell *c, void *data); void cell_make_multipoles(struct cell *c, integertime_t ti_current); void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); -void cell_check_particle_drift_point(struct cell *c, void *data); +void cell_check_part_drift_point(struct cell *c, void *data); +void cell_check_gpart_drift_point(struct cell *c, void *data); void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); -void cell_drift_particles(struct cell *c, const struct engine *e); +void cell_drift_part(struct cell *c, const struct engine *e); +void cell_drift_gpart(struct cell *c, const struct engine *e); void cell_drift_multipole(struct cell *c, const struct engine *e); void cell_drift_all_multipoles(struct cell *c, const struct engine *e); void cell_check_timesteps(struct cell *c); diff --git a/src/debug.c b/src/debug.c index 3732ee5e76..601f63d6e1 100644 --- a/src/debug.c +++ b/src/debug.c @@ -259,8 +259,8 @@ int checkCellhdxmax(const struct cell *c, int *depth) { message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } - if (c->dx_max != dx_max) { - message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max, dx_max); + if (c->dx_max_part != dx_max) { + message("%d Inconsistent dx_max: %f != %f", *depth, c->dx_max_part, dx_max); message("location: %f %f %f", c->loc[0], c->loc[1], c->loc[2]); result = 0; } diff --git a/src/engine.c b/src/engine.c index 03d5b4be46..1b2df3a143 100644 --- a/src/engine.c +++ b/src/engine.c @@ -1043,10 +1043,10 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #endif /* Drift before you send */ - if (ci->drift == NULL) - ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, - 0, ci, NULL); - scheduler_addunlock(s, ci->drift, t_xv); + if (ci->drift_part == NULL) + ci->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, ci, NULL); + scheduler_addunlock(s, ci->drift_part, t_xv); /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); @@ -1816,10 +1816,11 @@ void engine_count_and_link_tasks(struct engine *e) { } /* Link drift tasks to the next-higher drift task. */ - else if (t->type == task_type_drift) { + else if (t->type == task_type_drift_part) { struct cell *finger = ci->parent; - while (finger != NULL && finger->drift == NULL) finger = finger->parent; - if (finger != NULL) scheduler_addunlock(sched, t, finger->drift); + while (finger != NULL && finger->drift_part == NULL) + finger = finger->parent; + if (finger != NULL) scheduler_addunlock(sched, t, finger->drift_part); } /* Link self tasks to cells. */ @@ -1910,7 +1911,7 @@ static inline void engine_make_external_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> external gravity --> kick */ - scheduler_addunlock(sched, c->drift, gravity); + scheduler_addunlock(sched, c->drift_gpart, gravity); scheduler_addunlock(sched, gravity, c->super->kick2); } @@ -2076,14 +2077,14 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Sort tasks depend on the drift of the cell. */ if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { - scheduler_addunlock(sched, t->ci->drift, t); + scheduler_addunlock(sched, t->ci->drift_part, t); } /* Self-interaction? */ else if (t->type == task_type_self && t->subtype == task_subtype_density) { /* Make all density tasks depend on the drift. */ - scheduler_addunlock(sched, t->ci->drift, t); + scheduler_addunlock(sched, t->ci->drift_part, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2119,9 +2120,9 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Make all density tasks depend on the drift. */ if (t->ci->nodeID == engine_rank) - scheduler_addunlock(sched, t->ci->drift, t); + scheduler_addunlock(sched, t->ci->drift_part, t); if (t->cj->nodeID == engine_rank) - scheduler_addunlock(sched, t->cj->drift, t); + scheduler_addunlock(sched, t->cj->drift_part, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2478,7 +2479,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (t->subtype != task_subtype_density) continue; /* Too much particle movement? */ - if (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) + if (max(ci->h_max, cj->h_max) + ci->dx_max_part + cj->dx_max_part > + cj->dmin) *rebuild_space = 1; /* Set the correct sorting flags */ @@ -2499,7 +2501,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, error("bad flags in sort task."); #endif scheduler_activate(s, ci->sorts); - if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift_part); } if (!(cj->sorted & (1 << t->flags))) { #ifdef SWIFT_DEBUG_CHECKS @@ -2507,7 +2509,7 @@ void engine_marktasks_mapper(void *map_data, int num_elements, error("bad flags in sort task."); #endif scheduler_activate(s, cj->sorts); - if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift_part); } } @@ -2531,11 +2533,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, cj->drift); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift_part); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -2569,11 +2571,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, scheduler_activate(s, l->t); /* Drift both cells, the foreign one at the level which it is sent. */ - if (l->t->ci->drift) - scheduler_activate(s, l->t->ci->drift); + if (l->t->ci->drift_part) + scheduler_activate(s, l->t->ci->drift_part); else error("Drift task missing !"); - if (t->type == task_type_pair) scheduler_activate(s, ci->drift); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift_part); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -2590,20 +2592,22 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } } else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #else if (t->type == task_type_pair) { - scheduler_activate(s, ci->drift); - scheduler_activate(s, cj->drift); + scheduler_activate(s, ci->drift_part); + scheduler_activate(s, cj->drift_part); } #endif } - /* Kick/Drift? */ + /* Kick/Drift/init ? */ else if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_drift || t->type == task_type_init_grav) { + t->type == task_type_drift_part || + t->type == task_type_drift_gpart || + t->type == task_type_init_grav) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } @@ -3044,9 +3048,10 @@ void engine_skip_force_and_kick(struct engine *e) { struct task *t = &tasks[i]; /* Skip everything that updates the particles */ - if (t->type == task_type_drift || t->type == task_type_kick1 || - t->type == task_type_kick2 || t->type == task_type_timestep || - t->subtype == task_subtype_force || t->subtype == task_subtype_grav || + if (t->type == task_type_drift_part || t->type == task_type_drift_gpart || + t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_timestep || t->subtype == task_subtype_force || + t->subtype == task_subtype_grav || t->type == task_type_grav_long_range || t->type == task_type_grav_top_level || t->type == task_type_grav_down || t->type == task_type_cooling || t->type == task_type_sourceterms) @@ -3068,8 +3073,9 @@ void engine_skip_drift(struct engine *e) { struct task *t = &tasks[i]; - /* Skip everything that updates the particles */ - if (t->type == task_type_drift) t->skip = 1; + /* Skip everything that moves the particles */ + if (t->type == task_type_drift_part || t->type == task_type_drift_gpart) + t->skip = 1; } } @@ -3437,7 +3443,10 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, struct cell *c = &cells[ind]; if (c != NULL && c->nodeID == e->nodeID) { /* Drift all the particles */ - cell_drift_particles(c, e); + cell_drift_part(c, e); + + /* Drift all the g-particles */ + cell_drift_gpart(c, e); /* Drift the multipoles */ if (e->policy & engine_policy_self_gravity) diff --git a/src/runner.c b/src/runner.c index ca94d410bd..dd8cd7f426 100644 --- a/src/runner.c +++ b/src/runner.c @@ -332,7 +332,7 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { TIMER_TIC; /* Check that the particles have been moved to the current time */ - if (!cell_is_drifted(c, r->e)) error("Sorting un-drifted cell"); + if (!cell_are_part_drifted(c, r->e)) error("Sorting un-drifted cell"); #ifdef SWIFT_DEBUG_CHECKS /* Make sure the sort flags are consistent (downward). */ @@ -840,19 +840,35 @@ void runner_do_unskip_mapper(void *map_data, int num_elements, } } /** - * @brief Drift particles in real space. + * @brief Drift all part in a cell. * * @param r The runner thread. * @param c The cell. * @param timer Are we timing this ? */ -void runner_do_drift_particles(struct runner *r, struct cell *c, int timer) { +void runner_do_drift_part(struct runner *r, struct cell *c, int timer) { TIMER_TIC; - cell_drift_particles(c, r->e); + cell_drift_part(c, r->e); - if (timer) TIMER_TOC(timer_drift); + if (timer) TIMER_TOC(timer_drift_part); +} + +/** + * @brief Drift all gpart in a cell. + * + * @param r The runner thread. + * @param c The cell. + * @param timer Are we timing this ? + */ +void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { + + TIMER_TIC; + + cell_drift_gpart(c, r->e); + + if (timer) TIMER_TOC(timer_drift_gpart); } /** @@ -1523,7 +1539,7 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->ti_old = ti_current; + c->ti_old_part = ti_current; c->h_max = h_max; if (timer) TIMER_TOC(timer_dorecv_part); @@ -1597,7 +1613,7 @@ void runner_do_recv_gpart(struct runner *r, struct cell *c, int timer) { /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->ti_old = ti_current; + c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_gpart); @@ -1670,7 +1686,7 @@ void runner_do_recv_spart(struct runner *r, struct cell *c, int timer) { /* ... and store. */ c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; - c->ti_old = ti_current; + c->ti_old_gpart = ti_current; if (timer) TIMER_TOC(timer_dorecv_spart); @@ -1738,7 +1754,8 @@ void *runner_main(void *data) { if (!cell_is_active(ci, e) && t->type != task_type_sort && t->type != task_type_send && t->type != task_type_recv && - t->type != task_type_kick1 && t->type != task_type_drift) + t->type != task_type_kick1 && t->type != task_type_drift_part && + t->type != task_type_drift_gpart) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld", @@ -1866,8 +1883,11 @@ void *runner_main(void *data) { runner_do_extra_ghost(r, ci, 1); break; #endif - case task_type_drift: - runner_do_drift_particles(r, ci, 1); + case task_type_drift_part: + runner_do_drift_part(r, ci, 1); + break; + case task_type_drift_gpart: + runner_do_drift_gpart(r, ci, 1); break; case task_type_kick1: runner_do_kick1(r, ci, 1); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 839e492956..f4513ca75b 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -903,7 +903,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Get the sort ID. */ @@ -1147,7 +1147,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Get the shift ID. */ @@ -1597,7 +1597,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); + if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); struct part *restrict parts = c->parts; const int count = c->count; @@ -1846,7 +1846,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) error("Cell is not drifted"); + if (!cell_are_part_drifted(c, e)) error("Cell is not drifted"); struct part *restrict parts = c->parts; const int count = c->count; @@ -2279,8 +2279,8 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_are_part_drifted(ci, e)) cell_drift_part(ci, e); + if (!cell_are_part_drifted(cj, e)) cell_drift_part(cj, e); /* Do any of the cells need to be sorted first? */ if (!(ci->sorted & (1 << sid)) || @@ -2333,7 +2333,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { else { /* Drift the cell to the current timestep if needed. */ - if (!cell_is_drifted(ci, r->e)) cell_drift_particles(ci, r->e); + if (!cell_are_part_drifted(ci, r->e)) cell_drift_part(ci, r->e); #if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ defined(GADGET2_SPH) @@ -2586,8 +2586,8 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { /* Make sure both cells are drifted to the current timestep. */ - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_are_part_drifted(ci, e)) cell_drift_part(ci, e); + if (!cell_are_part_drifted(cj, e)) cell_drift_part(cj, e); /* Do any of the cells need to be sorted first? */ if (!(ci->sorted & (1 << sid)) || @@ -3218,7 +3218,7 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, new_sid = sortlistID[new_sid]; /* Do any of the cells need to be drifted first? */ - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_are_part_drifted(cj, e)) cell_drift_part(cj, e); DOPAIR_SUBSET(r, ci, parts, ind, count, cj); } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 13a55344d7..4960cff420 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -182,8 +182,8 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; /* Let's start by drifting things */ - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_are_gpart_drifted(ci, e)) cell_drift_gpart(ci, e); + if (!cell_are_gpart_drifted(cj, e)) cell_drift_gpart(cj, e); #if ICHECK > 0 for (int pid = 0; pid < gcount_i; pid++) { @@ -318,7 +318,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { if (!cell_is_active(c, e)) return; /* Do we need to start by drifting things ? */ - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + if (!cell_are_gpart_drifted(c, e)) cell_drift_gpart(c, e); #if ICHECK > 0 for (int pid = 0; pid < gcount; pid++) { diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index 90b612684d..a302817f7f 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -381,7 +381,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); + if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); /* Get the particle cache from the runner and re-allocate * the cache if it is not big enough for the cell. */ @@ -604,370 +604,6 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( #endif /* WITH_VECTORIZATION */ } -/** - * @brief Compute the cell self-interaction (non-symmetric) using vector - * intrinsics with two particle pis at a time. - * - * CURRENTLY BROKEN DO NOT USE. - * - * @param r The #runner. - * @param c The #cell. - */ -__attribute__((always_inline)) INLINE void runner_doself1_density_vec_2( - struct runner *r, struct cell *restrict c) { - -#ifdef WITH_VECTORIZATION - const struct engine *e = r->e; - int doi_mask; - int doi2_mask; - struct part *restrict pi; - struct part *restrict pi2; - int count_align; - - vector v_hi, v_vix, v_viy, v_viz, v_hig2, v_r2; - vector v_hi2, v_vix2, v_viy2, v_viz2, v_hig2_2, v2_r2; - - TIMER_TIC - - if (!cell_is_active(c, e)) return; - - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); - - /* TODO: Need to find two active particles, not just one. */ - - struct part *restrict parts = c->parts; - const int count = c->count; - - /* Get the particle cache from the runner and re-allocate - * the cache if it is not big enough for the cell. */ - struct cache *restrict cell_cache = &r->ci_cache; - - if (cell_cache->count < count) { - cache_init(cell_cache, count); - } - - /* Read the particles from the cell and store them locally in the cache. */ - cache_read_particles(c, &r->ci_cache); - - /* Create two secondary caches. */ - int icount = 0, icount_align = 0; - struct c2_cache int_cache; - - int icount2 = 0, icount_align2 = 0; - struct c2_cache int_cache2; - - /* Loop over the particles in the cell. */ - for (int pid = 0; pid < count; pid += 2) { - - /* Get a pointer to the ith particle and next i particle. */ - pi = &parts[pid]; - pi2 = &parts[pid + 1]; - - /* Is the ith particle active? */ - if (!part_is_active(pi, e)) continue; - - vector pix, piy, piz; - vector pix2, piy2, piz2; - - const float hi = cell_cache->h[pid]; - const float hi2 = cell_cache->h[pid + 1]; - - /* Fill pi position vector. */ - pix.v = vec_set1(cell_cache->x[pid]); - piy.v = vec_set1(cell_cache->y[pid]); - piz.v = vec_set1(cell_cache->z[pid]); - v_hi.v = vec_set1(hi); - v_vix.v = vec_set1(cell_cache->vx[pid]); - v_viy.v = vec_set1(cell_cache->vy[pid]); - v_viz.v = vec_set1(cell_cache->vz[pid]); - - pix2.v = vec_set1(cell_cache->x[pid + 1]); - piy2.v = vec_set1(cell_cache->y[pid + 1]); - piz2.v = vec_set1(cell_cache->z[pid + 1]); - v_hi2.v = vec_set1(hi2); - v_vix2.v = vec_set1(cell_cache->vx[pid + 1]); - v_viy2.v = vec_set1(cell_cache->vy[pid + 1]); - v_viz2.v = vec_set1(cell_cache->vz[pid + 1]); - - const float hig2 = hi * hi * kernel_gamma2; - const float hig2_2 = hi2 * hi2 * kernel_gamma2; - v_hig2.v = vec_set1(hig2); - v_hig2_2.v = vec_set1(hig2_2); - - vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, - curlvySum, curlvzSum; - vector rhoSum2, rho_dhSum2, wcountSum2, wcount_dhSum2, div_vSum2, - curlvxSum2, curlvySum2, curlvzSum2; - - vector v_hi_inv, v_hi_inv2; - - v_hi_inv = vec_reciprocal(v_hi); - v_hi_inv2 = vec_reciprocal(v_hi2); - - rhoSum.v = vec_setzero(); - rho_dhSum.v = vec_setzero(); - wcountSum.v = vec_setzero(); - wcount_dhSum.v = vec_setzero(); - div_vSum.v = vec_setzero(); - curlvxSum.v = vec_setzero(); - curlvySum.v = vec_setzero(); - curlvzSum.v = vec_setzero(); - - rhoSum2.v = vec_setzero(); - rho_dhSum2.v = vec_setzero(); - wcountSum2.v = vec_setzero(); - wcount_dhSum2.v = vec_setzero(); - div_vSum2.v = vec_setzero(); - curlvxSum2.v = vec_setzero(); - curlvySum2.v = vec_setzero(); - curlvzSum2.v = vec_setzero(); - - /* Pad cache if there is a serial remainder. */ - count_align = count; - int rem = count % (NUM_VEC_PROC * VEC_SIZE); - if (rem != 0) { - int pad = (NUM_VEC_PROC * VEC_SIZE) - rem; - - count_align += pad; - /* Set positions to the same as particle pi so when the r2 > 0 mask is - * applied these extra contributions are masked out.*/ - for (int i = count; i < count_align; i++) { - cell_cache->x[i] = pix.f[0]; - cell_cache->y[i] = piy.f[0]; - cell_cache->z[i] = piz.f[0]; - } - } - - vector pjx, pjy, pjz; - vector pjvx, pjvy, pjvz, mj; - vector pjx2, pjy2, pjz2; - vector pjvx2, pjvy2, pjvz2, mj2; - - /* Find all of particle pi's interacions and store needed values in - * secondary cache.*/ - for (int pjd = 0; pjd < count_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { - - /* Load 2 sets of vectors from the particle cache. */ - pjx.v = vec_load(&cell_cache->x[pjd]); - pjy.v = vec_load(&cell_cache->y[pjd]); - pjz.v = vec_load(&cell_cache->z[pjd]); - pjvx.v = vec_load(&cell_cache->vx[pjd]); - pjvy.v = vec_load(&cell_cache->vy[pjd]); - pjvz.v = vec_load(&cell_cache->vz[pjd]); - mj.v = vec_load(&cell_cache->m[pjd]); - - pjx2.v = vec_load(&cell_cache->x[pjd + VEC_SIZE]); - pjy2.v = vec_load(&cell_cache->y[pjd + VEC_SIZE]); - pjz2.v = vec_load(&cell_cache->z[pjd + VEC_SIZE]); - pjvx2.v = vec_load(&cell_cache->vx[pjd + VEC_SIZE]); - pjvy2.v = vec_load(&cell_cache->vy[pjd + VEC_SIZE]); - pjvz2.v = vec_load(&cell_cache->vz[pjd + VEC_SIZE]); - mj2.v = vec_load(&cell_cache->m[pjd + VEC_SIZE]); - - /* Compute the pairwise distance. */ - vector v_dx_tmp, v_dy_tmp, v_dz_tmp; - vector v_dx_tmp2, v_dy_tmp2, v_dz_tmp2, v_r2_2; - vector v_dx2_tmp, v_dy2_tmp, v_dz2_tmp; - vector v_dx2_tmp2, v_dy2_tmp2, v_dz2_tmp2, v2_r2_2; - - v_dx_tmp.v = vec_sub(pix.v, pjx.v); - v_dy_tmp.v = vec_sub(piy.v, pjy.v); - v_dz_tmp.v = vec_sub(piz.v, pjz.v); - v_dx_tmp2.v = vec_sub(pix.v, pjx2.v); - v_dy_tmp2.v = vec_sub(piy.v, pjy2.v); - v_dz_tmp2.v = vec_sub(piz.v, pjz2.v); - - v_dx2_tmp.v = vec_sub(pix2.v, pjx.v); - v_dy2_tmp.v = vec_sub(piy2.v, pjy.v); - v_dz2_tmp.v = vec_sub(piz2.v, pjz.v); - v_dx2_tmp2.v = vec_sub(pix2.v, pjx2.v); - v_dy2_tmp2.v = vec_sub(piy2.v, pjy2.v); - v_dz2_tmp2.v = vec_sub(piz2.v, pjz2.v); - - v_r2.v = vec_mul(v_dx_tmp.v, v_dx_tmp.v); - v_r2.v = vec_fma(v_dy_tmp.v, v_dy_tmp.v, v_r2.v); - v_r2.v = vec_fma(v_dz_tmp.v, v_dz_tmp.v, v_r2.v); - v_r2_2.v = vec_mul(v_dx_tmp2.v, v_dx_tmp2.v); - v_r2_2.v = vec_fma(v_dy_tmp2.v, v_dy_tmp2.v, v_r2_2.v); - v_r2_2.v = vec_fma(v_dz_tmp2.v, v_dz_tmp2.v, v_r2_2.v); - - v2_r2.v = vec_mul(v_dx2_tmp.v, v_dx2_tmp.v); - v2_r2.v = vec_fma(v_dy2_tmp.v, v_dy2_tmp.v, v2_r2.v); - v2_r2.v = vec_fma(v_dz2_tmp.v, v_dz2_tmp.v, v2_r2.v); - v2_r2_2.v = vec_mul(v_dx2_tmp2.v, v_dx2_tmp2.v); - v2_r2_2.v = vec_fma(v_dy2_tmp2.v, v_dy2_tmp2.v, v2_r2_2.v); - v2_r2_2.v = vec_fma(v_dz2_tmp2.v, v_dz2_tmp2.v, v2_r2_2.v); - -/* Form a mask from r2 < hig2 and r2 > 0.*/ -#ifdef HAVE_AVX512_F - // KNL_MASK_16 doi_mask, doi_mask_check, doi_mask2, doi_mask2_check; - KNL_MASK_16 doi_mask_check, doi_mask2, doi_mask2_check; - KNL_MASK_16 doi2_mask_check, doi2_mask2, doi2_mask2_check; - - doi_mask_check = vec_cmp_gt(v_r2.v, vec_setzero()); - doi_mask = vec_cmp_lt(v_r2.v, v_hig2.v); - - doi2_mask_check = vec_cmp_gt(v2_r2.v, vec_setzero()); - doi2_mask = vec_cmp_lt(v2_r2.v, v_hig2_2.v); - - doi_mask2_check = vec_cmp_gt(v_r2_2.v, vec_setzero()); - doi_mask2 = vec_cmp_lt(v_r2_2.v, v_hig2.v); - - doi2_mask2_check = vec_cmp_gt(v2_r2_2.v, vec_setzero()); - doi2_mask2 = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v); - - doi_mask = doi_mask & doi_mask_check; - doi_mask2 = doi_mask2 & doi_mask2_check; - - doi2_mask = doi2_mask & doi2_mask_check; - doi2_mask2 = doi2_mask2 & doi2_mask2_check; -#else - vector v_doi_mask, v_doi_mask_check, v_doi_mask2, v_doi_mask2_check; - int doi_mask2; - - vector v_doi2_mask, v_doi2_mask_check, v_doi2_mask2, v_doi2_mask2_check; - int doi2_mask2; - - v_doi_mask_check.v = vec_cmp_gt(v_r2.v, vec_setzero()); - v_doi_mask.v = vec_cmp_lt(v_r2.v, v_hig2.v); - - v_doi2_mask_check.v = vec_cmp_gt(v2_r2.v, vec_setzero()); - v_doi2_mask.v = vec_cmp_lt(v2_r2.v, v_hig2_2.v); - - v_doi_mask2_check.v = vec_cmp_gt(v_r2_2.v, vec_setzero()); - v_doi_mask2.v = vec_cmp_lt(v_r2_2.v, v_hig2.v); - - v_doi2_mask2_check.v = vec_cmp_gt(v2_r2_2.v, vec_setzero()); - v_doi2_mask2.v = vec_cmp_lt(v2_r2_2.v, v_hig2_2.v); - - doi_mask = vec_cmp_result(vec_and(v_doi_mask.v, v_doi_mask_check.v)); - doi_mask2 = vec_cmp_result(vec_and(v_doi_mask2.v, v_doi_mask2_check.v)); - doi2_mask = vec_cmp_result(vec_and(v_doi2_mask.v, v_doi2_mask_check.v)); - doi2_mask2 = - vec_cmp_result(vec_and(v_doi2_mask2.v, v_doi2_mask2_check.v)); -#endif /* HAVE_AVX512_F */ - - /* Hit or miss? */ - // if (doi_mask) { - storeInteractions(doi_mask, pjd, &v_r2, &v_dx_tmp, &v_dy_tmp, &v_dz_tmp, - &mj, &pjvx, &pjvy, &pjvz, cell_cache, &int_cache, - &icount, &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, - &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, v_hi_inv, - v_vix, v_viy, v_viz); - //} - // if (doi2_mask) { - storeInteractions( - doi2_mask, pjd, &v2_r2, &v_dx2_tmp, &v_dy2_tmp, &v_dz2_tmp, &mj, - &pjvx, &pjvy, &pjvz, cell_cache, &int_cache2, &icount2, &rhoSum2, - &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2, - &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, v_viz2); - //} - /* Hit or miss? */ - // if (doi_mask2) { - storeInteractions(doi_mask2, pjd + VEC_SIZE, &v_r2_2, &v_dx_tmp2, - &v_dy_tmp2, &v_dz_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, - cell_cache, &int_cache, &icount, &rhoSum, &rho_dhSum, - &wcountSum, &wcount_dhSum, &div_vSum, &curlvxSum, - &curlvySum, &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz); - //} - // if (doi2_mask2) { - storeInteractions(doi2_mask2, pjd + VEC_SIZE, &v2_r2_2, &v_dx2_tmp2, - &v_dy2_tmp2, &v_dz2_tmp2, &mj2, &pjvx2, &pjvy2, &pjvz2, - cell_cache, &int_cache2, &icount2, &rhoSum2, - &rho_dhSum2, &wcountSum2, &wcount_dhSum2, &div_vSum2, - &curlvxSum2, &curlvySum2, &curlvzSum2, v_hi_inv2, - v_vix2, v_viy2, v_viz2); - //} - } - - /* Perform padded vector remainder interactions if any are present. */ - calcRemInteractions(&int_cache, icount, &rhoSum, &rho_dhSum, &wcountSum, - &wcount_dhSum, &div_vSum, &curlvxSum, &curlvySum, - &curlvzSum, v_hi_inv, v_vix, v_viy, v_viz, - &icount_align); - - calcRemInteractions(&int_cache2, icount2, &rhoSum2, &rho_dhSum2, - &wcountSum2, &wcount_dhSum2, &div_vSum2, &curlvxSum2, - &curlvySum2, &curlvzSum2, v_hi_inv2, v_vix2, v_viy2, - v_viz2, &icount_align2); - - /* Initialise masks to true incase remainder interactions have been - * performed. */ - vector int_mask, int_mask2; - vector int2_mask, int2_mask2; -#ifdef HAVE_AVX512_F - KNL_MASK_16 knl_mask = 0xFFFF; - KNL_MASK_16 knl_mask2 = 0xFFFF; - int_mask.m = vec_setint1(0xFFFFFFFF); - int_mask2.m = vec_setint1(0xFFFFFFFF); - int2_mask.m = vec_setint1(0xFFFFFFFF); - int2_mask2.m = vec_setint1(0xFFFFFFFF); -#else - int_mask.m = vec_setint1(0xFFFFFFFF); - int_mask2.m = vec_setint1(0xFFFFFFFF); - - int2_mask.m = vec_setint1(0xFFFFFFFF); - int2_mask2.m = vec_setint1(0xFFFFFFFF); -#endif - - /* Perform interaction with 2 vectors. */ - for (int pjd = 0; pjd < icount_align; pjd += (NUM_VEC_PROC * VEC_SIZE)) { - runner_iact_nonsym_2_vec_density( - &int_cache.r2q[pjd], &int_cache.dxq[pjd], &int_cache.dyq[pjd], - &int_cache.dzq[pjd], v_hi_inv, v_vix, v_viy, v_viz, - &int_cache.vxq[pjd], &int_cache.vyq[pjd], &int_cache.vzq[pjd], - &int_cache.mq[pjd], &rhoSum, &rho_dhSum, &wcountSum, &wcount_dhSum, - &div_vSum, &curlvxSum, &curlvySum, &curlvzSum, int_mask, int_mask2, -#ifdef HAVE_AVX512_F - knl_mask, knl_mask2); -#else - 0, 0); -#endif - } - - for (int pjd = 0; pjd < icount_align2; pjd += (NUM_VEC_PROC * VEC_SIZE)) { - runner_iact_nonsym_2_vec_density( - &int_cache2.r2q[pjd], &int_cache2.dxq[pjd], &int_cache2.dyq[pjd], - &int_cache2.dzq[pjd], v_hi_inv2, v_vix2, v_viy2, v_viz2, - &int_cache2.vxq[pjd], &int_cache2.vyq[pjd], &int_cache2.vzq[pjd], - &int_cache2.mq[pjd], &rhoSum2, &rho_dhSum2, &wcountSum2, - &wcount_dhSum2, &div_vSum2, &curlvxSum2, &curlvySum2, &curlvzSum2, - int2_mask, int2_mask2, -#ifdef HAVE_AVX512_F - knl_mask, knl_mask2); -#else - 0, 0); -#endif - } - /* Perform horizontal adds on vector sums and store result in particle pi. - */ - VEC_HADD(rhoSum, pi->rho); - VEC_HADD(rho_dhSum, pi->density.rho_dh); - VEC_HADD(wcountSum, pi->density.wcount); - VEC_HADD(wcount_dhSum, pi->density.wcount_dh); - VEC_HADD(div_vSum, pi->density.div_v); - VEC_HADD(curlvxSum, pi->density.rot_v[0]); - VEC_HADD(curlvySum, pi->density.rot_v[1]); - VEC_HADD(curlvzSum, pi->density.rot_v[2]); - - VEC_HADD(rhoSum2, pi2->rho); - VEC_HADD(rho_dhSum2, pi2->density.rho_dh); - VEC_HADD(wcountSum2, pi2->density.wcount); - VEC_HADD(wcount_dhSum2, pi2->density.wcount_dh); - VEC_HADD(div_vSum2, pi2->density.div_v); - VEC_HADD(curlvxSum2, pi2->density.rot_v[0]); - VEC_HADD(curlvySum2, pi2->density.rot_v[1]); - VEC_HADD(curlvzSum2, pi2->density.rot_v[2]); - - /* Reset interaction count. */ - icount = 0; - icount2 = 0; - } /* loop over all particles. */ - - TIMER_TOC(timer_doself_density); -#endif /* WITH_VECTORIZATION */ -} - /** * @brief Compute the density interactions between a cell pair (non-symmetric) * using vector intrinsics. @@ -989,7 +625,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); /* Get the sort ID. */ diff --git a/src/scheduler.c b/src/scheduler.c index 2d1e0093cc..a78174258d 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -141,7 +141,8 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { ((t->type == task_type_pair && t->cj == NULL)) || ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_drift_part) && t->ci->nodeID != s->nodeID) || + ((t->type == task_type_drift_gpart) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID)) { t->type = task_type_none; t->subtype = task_subtype_none; @@ -178,9 +179,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { if (t->subtype == task_subtype_grav || t->subtype == task_subtype_external_grav) { lock_lock(&ci->lock); - if (ci->drift == NULL) - ci->drift = scheduler_addtask(s, task_type_drift, - task_subtype_none, 0, 0, ci, NULL); + if (ci->drift_gpart == NULL) + ci->drift_gpart = scheduler_addtask( + s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); lock_unlock_blind(&ci->lock); } @@ -220,16 +221,25 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { } } - /* Otherwise, make sure the self task has a drift task. */ + /* Otherwise, make sure the self task has a drift task of the correct + kind. */ else { + lock_lock(&ci->lock); - if (ci->drift == NULL) - ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, - 0, 0, ci, NULL); + + /* Drift gparts case */ + if (t->subtype == task_subtype_grav && ci->drift_gpart == NULL) + ci->drift_gpart = scheduler_addtask( + s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); + + /* Drift parts case */ + else if (t->subtype == task_subtype_density && ci->drift_part == NULL) + ci->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, ci, NULL); lock_unlock_blind(&ci->lock); } - /* Pair interaction? */ + /* Non-gravity pair interaction? */ } else if (t->type == task_type_pair && t->subtype != task_subtype_grav) { /* Get a handle on the cells involved. */ @@ -633,9 +643,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Create the drift and sort for ci. */ lock_lock(&ci->lock); - if (ci->drift == NULL && ci->nodeID == engine_rank) - ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, - 0, 0, ci, NULL); + if (ci->drift_part == NULL && ci->nodeID == engine_rank) + ci->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, ci, NULL); if (ci->sorts == NULL) ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 1 << sid, 0, ci, NULL); @@ -646,9 +656,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Create the sort for cj. */ lock_lock(&cj->lock); - if (cj->drift == NULL && cj->nodeID == engine_rank) - cj->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, - 0, 0, cj, NULL); + if (cj->drift_part == NULL && cj->nodeID == engine_rank) + cj->drift_part = scheduler_addtask(s, task_type_drift_part, + task_subtype_none, 0, 0, cj, NULL); if (cj->sorts == NULL) cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 1 << sid, 0, cj, NULL); @@ -1013,9 +1023,12 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_ghost: if (t->ci == t->ci->super) cost = wscale * t->ci->count; break; - case task_type_drift: + case task_type_drift_part: cost = wscale * t->ci->count; break; + case task_type_drift_gpart: + cost = wscale * t->ci->gcount; + break; case task_type_kick1: cost = wscale * t->ci->count; break; @@ -1144,7 +1157,8 @@ void scheduler_start(struct scheduler *s) { } else if (cj == NULL) { /* self */ if (ci->ti_end_min == ti_current && t->skip && - t->type != task_type_sort && t->type != task_type_drift && t->type) + t->type != task_type_sort && t->type != task_type_drift_part && + t->type != task_type_drift_gpart) error( "Task (type='%s/%s') should not have been skipped " "ti_current=%lld " @@ -1235,7 +1249,8 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_ghost: case task_type_kick1: case task_type_kick2: - case task_type_drift: + case task_type_drift_part: + case task_type_drift_gpart: case task_type_timestep: qid = t->ci->super->owner; break; diff --git a/src/space.c b/src/space.c index 1d957e7e72..2844daa06e 100644 --- a/src/space.c +++ b/src/space.c @@ -206,7 +206,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->gradient = NULL; c->force = NULL; c->grav = NULL; - c->dx_max = 0.0f; + c->dx_max_part = 0.0f; + c->dx_max_gpart = 0.0f; c->dx_max_sort = 0.0f; c->sorted = 0; c->count = 0; @@ -218,7 +219,8 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->kick1 = NULL; c->kick2 = NULL; c->timestep = NULL; - c->drift = NULL; + c->drift_part = NULL; + c->drift_gpart = NULL; c->cooling = NULL; c->sourceterms = NULL; c->grav_long_range = NULL; @@ -420,7 +422,8 @@ void space_regrid(struct space *s, int verbose) { c->gcount = 0; c->scount = 0; c->super = c; - c->ti_old = ti_old; + c->ti_old_part = ti_old; + c->ti_old_gpart = ti_old; c->ti_old_multipole = ti_old; if (s->gravity) c->multipole = &s->multipoles_top[cid]; } @@ -902,7 +905,8 @@ void space_rebuild(struct space *s, int verbose) { struct spart *sfinger = s->sparts; for (int k = 0; k < s->nr_cells; k++) { struct cell *restrict c = &cells_top[k]; - c->ti_old = ti_old; + c->ti_old_part = ti_old; + c->ti_old_gpart = ti_old; c->ti_old_multipole = ti_old; c->parts = finger; c->xparts = xfinger; @@ -2011,7 +2015,8 @@ void space_split_recursive(struct space *s, struct cell *c, cp->count = 0; cp->gcount = 0; cp->scount = 0; - cp->ti_old = c->ti_old; + cp->ti_old_part = c->ti_old_part; + cp->ti_old_gpart = c->ti_old_gpart; cp->ti_old_multipole = c->ti_old_multipole; cp->loc[0] = c->loc[0]; cp->loc[1] = c->loc[1]; @@ -2025,8 +2030,9 @@ void space_split_recursive(struct space *s, struct cell *c, if (k & 1) cp->loc[2] += cp->width[2]; cp->depth = c->depth + 1; cp->split = 0; - cp->h_max = 0.0; - cp->dx_max = 0.f; + cp->h_max = 0.f; + cp->dx_max_part = 0.f; + cp->dx_max_gpart = 0.f; cp->dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; @@ -2877,7 +2883,8 @@ void space_check_drift_point(struct space *s, integertime_t ti_drift, int multipole) { #ifdef SWIFT_DEBUG_CHECKS /* Recursively check all cells */ - space_map_cells_pre(s, 1, cell_check_particle_drift_point, &ti_drift); + space_map_cells_pre(s, 1, cell_check_part_drift_point, &ti_drift); + space_map_cells_pre(s, 1, cell_check_gpart_drift_point, &ti_drift); if (multipole) space_map_cells_pre(s, 1, cell_check_multipole_drift_point, &ti_drift); #else diff --git a/src/task.c b/src/task.c index 1bb3df7ea9..1a4645b4ab 100644 --- a/src/task.c +++ b/src/task.c @@ -47,27 +47,15 @@ #include "lock.h" /* Task type names. */ -const char *taskID_names[task_type_count] = {"none", - "sort", - "self", - "pair", - "sub_self", - "sub_pair", - "init_grav", - "ghost", - "extra_ghost", - "drift", - "kick1", - "kick2", - "timestep", - "send", - "recv", - "grav_top_level", - "grav_long_range", - "grav_mm", - "grav_down", - "cooling", - "sourceterms"}; +const char *taskID_names[task_type_count] = { + "none", "sort", "self", + "pair", "sub_self", "sub_pair", + "init_grav", "ghost", "extra_ghost", + "drift_part", "drift_gpart", "kick1", + "kick2", "timestep", "send", + "recv", "grav_top_level", "grav_long_range", + "grav_mm", "grav_down", "cooling", + "sourceterms"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -132,6 +120,7 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_none; break; + case task_type_drift_part: case task_type_sort: case task_type_ghost: case task_type_extra_ghost: @@ -169,7 +158,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( case task_type_timestep: case task_type_send: case task_type_recv: - case task_type_drift: if (t->ci->count > 0 && t->ci->gcount > 0) return task_action_all; else if (t->ci->count > 0) @@ -187,8 +175,10 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( return task_action_multipole; break; + case task_type_drift_gpart: case task_type_grav_down: return task_action_gpart; + break; default: error("Unknown task_action for task"); @@ -286,15 +276,19 @@ void task_unlock(struct task *t) { case task_type_kick1: case task_type_kick2: case task_type_timestep: - case task_type_drift: cell_unlocktree(ci); cell_gunlocktree(ci); break; + case task_type_drift_part: case task_type_sort: cell_unlocktree(ci); break; + case task_type_drift_gpart: + cell_gunlocktree(ci); + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { @@ -371,7 +365,6 @@ int task_lock(struct task *t) { case task_type_kick1: case task_type_kick2: case task_type_timestep: - case task_type_drift: if (ci->hold || ci->ghold) return 0; if (cell_locktree(ci) != 0) return 0; if (cell_glocktree(ci) != 0) { @@ -380,10 +373,17 @@ int task_lock(struct task *t) { } break; + case task_type_drift_part: case task_type_sort: + if (ci->hold) return 0; if (cell_locktree(ci) != 0) return 0; break; + case task_type_drift_gpart: + if (ci->ghold) return 0; + if (cell_glocktree(ci) != 0) return 0; + break; + case task_type_self: case task_type_sub_self: if (subtype == task_subtype_grav) { diff --git a/src/task.h b/src/task.h index 049f86bdd6..0df0d8c4c7 100644 --- a/src/task.h +++ b/src/task.h @@ -47,7 +47,8 @@ enum task_types { task_type_init_grav, task_type_ghost, task_type_extra_ghost, - task_type_drift, + task_type_drift_part, + task_type_drift_gpart, task_type_kick1, task_type_kick2, task_type_timestep, diff --git a/src/timers.c b/src/timers.c index 9f86ea438e..62eac20596 100644 --- a/src/timers.c +++ b/src/timers.c @@ -40,7 +40,8 @@ const char* timers_names[timer_count] = { "prepare", "init", "init_grav", - "drift", + "drift_part", + "drift_gpart", "kick1", "kick2", "timestep", diff --git a/src/timers.h b/src/timers.h index 4089afa5d7..9248be4f30 100644 --- a/src/timers.h +++ b/src/timers.h @@ -41,7 +41,8 @@ enum { timer_prepare, timer_init, timer_init_grav, - timer_drift, + timer_drift_part, + timer_drift_gpart, timer_kick1, timer_kick2, timer_timestep, -- GitLab