diff --git a/src/cell.h b/src/cell.h index d1f4fa427669eaf80fe5eb63894746da3d282787..8756d874adfe77c51355061d68be4f070bab6147 100644 --- a/src/cell.h +++ b/src/cell.h @@ -809,6 +809,36 @@ cell_can_recurse_in_pair_hydro_task(const struct cell *c) { c->hydro.dx_max_part_old) < 0.5f * c->dmin); } +/** + * @brief Can a sub-pair hydro task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_subpair_hydro_task(const struct cell *c) { + + /* If so, is the cut-off radius plus the max distance the parts have moved */ + /* smaller than the sub-cell sizes ? */ + return ((kernel_gamma * c->hydro.h_max_active + c->hydro.dx_max_part_old) < + 0.5f * c->dmin); +} + +/** + * @brief Can a sub-pair hydro task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_subpair2_hydro_task(const struct cell *c) { + + /* If so, is the cut-off radius plus the max distance the parts have moved */ + /* smaller than the sub-cell sizes ? */ + return ((kernel_gamma * c->hydro.h_max + c->hydro.dx_max_part) < + 0.5f * c->dmin); +} + /** * @brief Can a sub-self hydro task recurse to a lower level based * on the status of the particles in the cell. @@ -822,6 +852,32 @@ cell_can_recurse_in_self_hydro_task(const struct cell *c) { return c->split && (kernel_gamma * c->hydro.h_max_old < 0.5f * c->dmin); } +/** + * @brief Can a sub-self hydro task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_subself_hydro_task(const struct cell *c) { + + /* Is the cell not smaller than the smoothing length? */ + return (kernel_gamma * c->hydro.h_max_active < 0.5f * c->dmin); +} + +/** + * @brief Can a hydro task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_subself2_hydro_task(const struct cell *c) { + + /* Is the cell split and not smaller than the smoothing length? */ + return c->split && (kernel_gamma * c->hydro.h_max < 0.5f * c->dmin); +} + /** * @brief Can a sub-pair star task recurse to a lower level based * on the status of the particles in the cell. @@ -830,18 +886,29 @@ cell_can_recurse_in_self_hydro_task(const struct cell *c) { * @param cj The #cell with hydro parts. */ __attribute__((always_inline)) INLINE static int -cell_can_recurse_in_pair_stars_task(const struct cell *ci, - const struct cell *cj) { +cell_can_recurse_in_pair_stars_task(const struct cell *c) { /* Is the cell split ? */ /* If so, is the cut-off radius plus the max distance the parts have moved */ /* smaller than the sub-cell sizes ? */ /* Note: We use the _old values as these might have been updated by a drift */ - return ci->split && cj->split && - ((kernel_gamma * ci->stars.h_max_old + ci->stars.dx_max_part_old) < - 0.5f * ci->dmin) && - ((kernel_gamma * cj->hydro.h_max_old + cj->hydro.dx_max_part_old) < - 0.5f * cj->dmin); + return c->split && ((kernel_gamma * c->stars.h_max_old + + c->stars.dx_max_part_old) < 0.5f * c->dmin); +} + +/** + * @brief Can a sub-pair stars task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_subpair_stars_task(const struct cell *c) { + + /* If so, is the cut-off radius plus the max distance the parts have moved */ + /* smaller than the sub-cell sizes ? */ + return ((kernel_gamma * c->stars.h_max_active + c->stars.dx_max_part_old) < + 0.5f * c->dmin); } /** @@ -858,6 +925,19 @@ cell_can_recurse_in_self_stars_task(const struct cell *c) { (kernel_gamma * c->hydro.h_max_old < 0.5f * c->dmin); } +/** + * @brief Can a sub-self stars task recurse to a lower level based + * on the status of the particles in the cell. + * + * @param c The #cell. + */ +__attribute__((always_inline)) INLINE static int +cell_can_recurse_in_subself_stars_task(const struct cell *c) { + + /* Is the cell not smaller than the smoothing length? */ + return (kernel_gamma * c->stars.h_max_active < 0.5f * c->dmin); +} + /** * @brief Can a sub-pair sink task recurse to a lower level based * on the status of the particles in the cell. diff --git a/src/cell_convert_part.c b/src/cell_convert_part.c index 68904a7206d89cfb722680a607a05fd28db7179a..f7f54beb6f23123dc7202c0a21208b22034f5689 100644 --- a/src/cell_convert_part.c +++ b/src/cell_convert_part.c @@ -922,6 +922,9 @@ struct spart *cell_convert_part_to_spart(struct engine *e, struct cell *c, /* Set a smoothing length */ sp->h = p->h; + /* Give the new particle the correct depth */ + cell_set_spart_h_depth(sp, c); + /* Here comes the Sun! */ return sp; } @@ -1000,6 +1003,9 @@ struct spart *cell_spawn_new_spart_from_part(struct engine *e, struct cell *c, /* Set a smoothing length */ sp->h = p->h; + /* Give the new particle the correct depth */ + cell_set_spart_h_depth(sp, c); + /* Here comes the Sun! */ return sp; } @@ -1148,6 +1154,9 @@ struct spart *cell_spawn_new_spart_from_sink(struct engine *e, struct cell *c, /* Set a smoothing length */ sp->h = s->h; + /* Give the new particle the correct depth */ + cell_set_spart_h_depth(sp, c); + /* Here comes the Sun! */ return sp; } diff --git a/src/cell_drift.c b/src/cell_drift.c index 9b5b07cc3d972723086ef464feef9171f984b30d..6454dc0392d6b89bf9a2e367850a9b1bb9564db6 100644 --- a/src/cell_drift.c +++ b/src/cell_drift.c @@ -331,6 +331,7 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force, /* Limit h to within the allowed range */ p->h = min(p->h, hydro_h_max); p->h = max(p->h, hydro_h_min); + /* Set the appropriate depth level for this particle */ cell_set_part_h_depth(p, c); @@ -681,9 +682,6 @@ void cell_drift_spart(struct cell *c, const struct engine *e, int force, drift_spart(sp, dt_drift, ti_old_spart, ti_current, e, replication_list, c->loc); - /* Set the appropriate depth level for this particle */ - cell_set_spart_h_depth(sp, c); - #ifdef SWIFT_DEBUG_CHECKS /* Make sure the particle does not drift by more than a box length. */ if (fabs(sp->v[0] * dt_drift) > e->s->dim[0] || @@ -730,6 +728,9 @@ void cell_drift_spart(struct cell *c, const struct engine *e, int force, sp->h = min(sp->h, stars_h_max); sp->h = max(sp->h, stars_h_min); + /* Set the appropriate depth level for this particle */ + cell_set_spart_h_depth(sp, c); + /* Compute (square of) motion since last cell construction */ const float dx2 = sp->x_diff[0] * sp->x_diff[0] + sp->x_diff[1] * sp->x_diff[1] + @@ -890,9 +891,6 @@ void cell_drift_bpart(struct cell *c, const struct engine *e, int force, drift_bpart(bp, dt_drift, ti_old_bpart, ti_current, e, replication_list, c->loc); - /* Set the appropriate depth level for this particle */ - cell_set_bpart_h_depth(bp, c); - #ifdef SWIFT_DEBUG_CHECKS /* Make sure the particle does not drift by more than a box length. */ if (fabs(bp->v[0] * dt_drift) > e->s->dim[0] || @@ -937,6 +935,9 @@ void cell_drift_bpart(struct cell *c, const struct engine *e, int force, bp->h = min(bp->h, black_holes_h_max); bp->h = max(bp->h, black_holes_h_min); + /* Set the appropriate depth level for this particle */ + cell_set_bpart_h_depth(bp, c); + /* Compute (square of) motion since last cell construction */ const float dx2 = bp->x_diff[0] * bp->x_diff[0] + bp->x_diff[1] * bp->x_diff[1] + @@ -994,6 +995,8 @@ void cell_drift_sink(struct cell *c, const struct engine *e, int force) { const int periodic = e->s->periodic; const double dim[3] = {e->s->dim[0], e->s->dim[1], e->s->dim[2]}; const int with_cosmology = (e->policy & engine_policy_cosmology); + const float sinks_h_max = e->hydro_properties->h_max; + const float sinks_h_min = e->hydro_properties->h_min; const integertime_t ti_old_sink = c->sinks.ti_old_part; const integertime_t ti_current = e->ti_current; struct sink *const sinks = c->sinks.parts; @@ -1077,9 +1080,6 @@ void cell_drift_sink(struct cell *c, const struct engine *e, int force) { /* Drift... */ drift_sink(sink, dt_drift, ti_old_sink, ti_current); - /* Set the appropriate depth level for this particle */ - cell_set_sink_h_depth(sink, c); - #ifdef SWIFT_DEBUG_CHECKS /* Make sure the particle does not drift by more than a box length. */ if (fabs(sink->v[0] * dt_drift) > e->s->dim[0] || @@ -1120,7 +1120,12 @@ void cell_drift_sink(struct cell *c, const struct engine *e, int force) { } } - /* sp->h does not need to be limited. */ + /* Limit h to within the allowed range */ + sink->h = min(sink->h, sinks_h_max); + sink->h = max(sink->h, sinks_h_min); + + /* Set the appropriate depth level for this particle */ + cell_set_sink_h_depth(sink, c); /* Compute (square of) motion since last cell construction */ const float dx2 = sink->x_diff[0] * sink->x_diff[0] + diff --git a/src/cell_unskip.c b/src/cell_unskip.c index f7b93c31768c59f0d0f516ef9d7aef3dd269b4d2..22a73b0d3dbd5ae4221b6b27a2387393fa3bfa41 100644 --- a/src/cell_unskip.c +++ b/src/cell_unskip.c @@ -970,8 +970,8 @@ void cell_activate_subcell_stars_tasks(struct cell *ci, struct cell *cj, if (!ci_active && !cj_active) return; /* recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { const struct cell_split_pair *csp = &cell_split_pairs[sid]; for (int k = 0; k < csp->count; k++) { @@ -1691,11 +1691,25 @@ int cell_unskip_hydro_tasks(struct cell *c, struct scheduler *s) { /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_self) { cell_activate_subcell_hydro_tasks(ci, NULL, s, with_timestep_limiter); + + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci_nodeID == nodeID && with_timestep_limiter) + cell_activate_limiter(ci, s); } /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair) { cell_activate_subcell_hydro_tasks(ci, cj, s, with_timestep_limiter); + + /* Activate the drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + + /* Activate the limiter tasks. */ + if (ci_nodeID == nodeID && with_timestep_limiter) + cell_activate_limiter(ci, s); + if (cj_nodeID == nodeID && with_timestep_limiter) + cell_activate_limiter(cj, s); } } @@ -2272,6 +2286,10 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s, cell_activate_subcell_stars_tasks(ci, NULL, s, with_star_formation, with_star_formation_sink, with_timestep_sync); + + cell_activate_drift_spart(ci, s); + cell_activate_drift_part(ci, s); + if (with_timestep_sync) cell_activate_sync_part(ci, s); } else if (t->type == task_type_sub_pair) { @@ -2279,6 +2297,18 @@ int cell_unskip_stars_tasks(struct cell *c, struct scheduler *s, with_star_formation_sink, with_timestep_sync); + /* Activate the drift tasks. */ + if (ci_nodeID == nodeID) cell_activate_drift_spart(ci, s); + if (cj_nodeID == nodeID) cell_activate_drift_part(cj, s); + if (cj_nodeID == nodeID && with_timestep_sync) + cell_activate_sync_part(cj, s); + + /* Activate the drift tasks. */ + if (cj_nodeID == nodeID) cell_activate_drift_spart(cj, s); + if (ci_nodeID == nodeID) cell_activate_drift_part(ci, s); + if (ci_nodeID == nodeID && with_timestep_sync) + cell_activate_sync_part(ci, s); + /* Activate stars_in for each cell that is part of * a sub_pair task as to not miss any dependencies */ if (ci_nodeID == nodeID) diff --git a/src/runner_doiact_functions_hydro.h b/src/runner_doiact_functions_hydro.h index d8bf99b2f6cd4bb1c77935512164bbde99f678ed..102fb042fc2bb7e6c568f2e511610b55e5d84b34 100644 --- a/src/runner_doiact_functions_hydro.h +++ b/src/runner_doiact_functions_hydro.h @@ -34,9 +34,12 @@ * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { +void DOPAIR1_NAIVE(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -51,15 +54,29 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Anything to do here? */ if (!CELL_IS_ACTIVE(ci, e) && !CELL_IS_ACTIVE(cj, e)) return; + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + GET_MU0(); + const int count_i = ci->hydro.count; const int count_j = cj->hydro.count; struct part *restrict parts_i = ci->hydro.parts; struct part *restrict parts_j = cj->hydro.parts; - /* Cosmological terms and physical constants */ - const float a = cosmo->a; - const float H = cosmo->H; - GET_MU0(); +#ifdef SWIFT_DEBUG_CHECKS + if (ci->dmin != cj->dmin) error("Cells of different size!"); +#endif + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; +#endif /* Get the relative distance between the pairs, wrapping. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -80,6 +97,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, if (part_is_inhibited(pi, e)) continue; const int pi_active = PART_IS_ACTIVE(pi, e); + const char depth_i = pi->depth_h; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -95,9 +113,10 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; + const int pj_active = PART_IS_ACTIVE(pj, e); + const char depth_j = pj->depth_h; const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; - const int pj_active = PART_IS_ACTIVE(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), @@ -114,8 +133,17 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, error("Particle pj not drifted to current time"); #endif + const int doi = pi_active && (r2 < hig2) && (depth_i >= min_depth) && + (depth_i <= max_depth); + const int doj = pj_active && (r2 < hjg2) && (depth_j >= min_depth) && + (depth_j <= max_depth); + /* Hit or miss? */ - if (r2 < hig2 && pi_active) { + if (doi) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); @@ -132,7 +160,11 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, t_current, cosmo, with_cosmology); #endif } - if (r2 < hjg2 && pj_active) { + if (doj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -144,7 +176,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_sink(r2, dx, hj, hi, pj, pi, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) runner_iact_nonsym_timebin(r2, dx, hj, hi, pj, pi, a, H); @@ -167,9 +199,12 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { +void DOPAIR2_NAIVE(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -184,15 +219,25 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, /* Anything to do here? */ if (!CELL_IS_ACTIVE(ci, e) && !CELL_IS_ACTIVE(cj, e)) return; + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + GET_MU0(); + const int count_i = ci->hydro.count; const int count_j = cj->hydro.count; struct part *restrict parts_i = ci->hydro.parts; struct part *restrict parts_j = cj->hydro.parts; - /* Cosmological terms and physical constants */ - const float a = cosmo->a; - const float H = cosmo->H; - GET_MU0(); + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; +#endif /* Get the relative distance between the pairs, wrapping. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -213,6 +258,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, if (part_is_inhibited(pi, e)) continue; const int pi_active = PART_IS_ACTIVE(pi, e); + const char depth_i = pi->depth_h; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -229,6 +275,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, if (part_is_inhibited(pj, e)) continue; const int pj_active = PART_IS_ACTIVE(pj, e); + const char depth_j = pj->depth_h; const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; @@ -247,62 +294,57 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, error("Particle pj not drifted to current time"); #endif + const int doi = pi_active && (depth_i >= min_depth) && + (depth_i <= max_depth) && ((r2 < hig2) || (r2 < hjg2)); + const int doj = pj_active && (depth_j >= min_depth) && + (depth_j <= max_depth) && ((r2 < hjg2) || (r2 < hig2)); + /* Hit or miss? */ - if (r2 < hig2 || r2 < hjg2) { + if (doi) { - if (pi_active && pj_active) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif - IACT(r2, dx, hi, hj, pi, pj, a, H); - IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); + IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_sink(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, - t_current, cosmo, with_cosmology); + runner_iact_nonsym_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } else if (pi_active) { + } + if (doj) { - IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); - IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); -#endif -#if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_nonsym_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, - t_current, cosmo, with_cosmology); +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); #endif - } else if (pj_active) { - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; - IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); - IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); + IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_sink(r2, dx, hj, hi, pj, pi, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_nonsym_timebin(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_rt_timebin(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_diffusion(r2, dx, hj, hi, pj, pi, a, H, time_base, - t_current, cosmo, with_cosmology); + runner_iact_nonsym_timebin(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_rt_timebin(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_diffusion(r2, dx, hj, hi, pj, pi, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } } } /* loop over the parts in cj. */ } /* loop over the parts in ci. */ @@ -317,8 +359,11 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, * * @param r The #runner. * @param c The #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { +void DOSELF1_NAIVE(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -339,7 +384,17 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { GET_MU0(); const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; + struct part *parts = c->hydro.parts; + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { @@ -351,6 +406,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { if (part_is_inhibited(pi, e)) continue; const int pi_active = PART_IS_ACTIVE(pi, e); + const char depth_i = pi->depth_h; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -369,6 +425,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = PART_IS_ACTIVE(pj, e); + const char depth_j = pj->depth_h; /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -377,8 +434,10 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = pi_active && (r2 < hig2); - const int doj = pj_active && (r2 < hjg2); + const int doi = pi_active && (r2 < hig2) && (depth_i >= min_depth) && + (depth_i <= max_depth); + const int doj = pj_active && (r2 < hjg2) && (depth_j >= min_depth) && + (depth_j <= max_depth); #if defined(SWIFT_DEBUG_CHECKS) && defined(DO_DRIFT_DEBUG_CHECKS) /* Check that particles have been drifted to the current time */ @@ -391,6 +450,11 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (doi && doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif + IACT(r2, dx, hi, hj, pi, pj, a, H); IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -407,6 +471,10 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { #endif } else if (doi) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -423,6 +491,10 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { #endif } else if (doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif + dx[0] = -dx[0]; dx[1] = -dx[1]; dx[2] = -dx[2]; @@ -455,8 +527,11 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { * * @param r The #runner. * @param c The #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { +void DOSELF2_NAIVE(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -477,7 +552,17 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { GET_MU0(); const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; + struct part *parts = c->hydro.parts; + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { @@ -489,6 +574,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { if (part_is_inhibited(pi, e)) continue; const int pi_active = PART_IS_ACTIVE(pi, e); + const char depth_i = pi->depth_h; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -504,9 +590,10 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; + const int pj_active = PART_IS_ACTIVE(pj, e); + const char depth_j = pj->depth_h; const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; - const int pj_active = PART_IS_ACTIVE(pj, e); /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -515,8 +602,10 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = pi_active && ((r2 < hig2) || (r2 < hjg2)); - const int doj = pj_active && ((r2 < hig2) || (r2 < hjg2)); + const int doi = pi_active && (depth_i >= min_depth) && + (depth_i <= max_depth) && ((r2 < hig2) || (r2 < hjg2)); + const int doj = pj_active && (depth_j >= min_depth) && + (depth_j <= max_depth) && ((r2 < hig2) || (r2 < hjg2)); #if defined(SWIFT_DEBUG_CHECKS) && defined(DO_DRIFT_DEBUG_CHECKS) /* Check that particles have been drifted to the current time */ @@ -529,6 +618,11 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (doi && doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif + IACT(r2, dx, hi, hj, pi, pj, a, H); IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -545,6 +639,10 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { #endif } else if (doi) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -561,6 +659,10 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { #endif } else if (doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif + dx[0] = -dx[0]; dx[1] = -dx[1]; dx[2] = -dx[2]; @@ -600,10 +702,10 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { * @param cj The second #cell. * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, - struct part *restrict parts_i, int *restrict ind, - int count, struct cell *restrict cj, - const double *shift) { +void DOPAIR_SUBSET_NAIVE(struct runner *r, const struct cell *restrict ci, + struct part *restrict parts_i, const int *ind, + const int count, const struct cell *restrict cj, + const double shift[3]) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -701,10 +803,10 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, * @param flipped Flag to check whether the cells have been flipped or not. * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, - struct part *restrict parts_i, int *restrict ind, int count, - struct cell *restrict cj, const int sid, const int flipped, - const double *shift) { +void DOPAIR_SUBSET(struct runner *r, const struct cell *restrict ci, + struct part *restrict parts_i, const int *ind, + const int count, const struct cell *restrict cj, + const int sid, const int flipped, const double shift[3]) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -873,9 +975,9 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, * @param count The number of particles in @c ind. * @param cj The second #cell. */ -void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, - struct part *restrict parts_i, int *restrict ind, - int count, struct cell *restrict cj) { +void DOPAIR_SUBSET_BRANCH(struct runner *r, const struct cell *restrict ci, + struct part *restrict parts_i, const int *ind, + const int count, struct cell *restrict cj) { const struct engine *e = r->e; @@ -902,15 +1004,18 @@ void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, const int flipped = runner_flip[sid]; sid = sortlistID[sid]; + /* Let's first lock the cell */ + lock_lock(&cj->hydro.extra_sort_lock); + /* Is it sorted, if not we use the naive interactions. */ const int is_sorted = (cj->hydro.sorted & (1 << sid)) && (cj->hydro.dx_max_sort_old <= space_maxreldx * cj->dmin); #if defined(SWIFT_USE_NAIVE_INTERACTIONS) - int force_naive = 1; + const int force_naive = 1; #else - int force_naive = 0; + const int force_naive = 0; #endif if (force_naive || !is_sorted) { @@ -926,20 +1031,25 @@ void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, DOPAIR_SUBSET(r, ci, parts_i, ind, count, cj, sid, flipped, shift); #endif } + + /* Now we can unlock */ + if (lock_unlock(&cj->hydro.extra_sort_lock) != 0) + error("Impossible to unlock cell!"); } /** - * @brief Compute the interactions between a cell pair, but only for the - * given indices in ci. + * @brief Compute the interactions between a cell, but only for the + * given indices in c. * * @param r The #runner. - * @param ci The first #cell. + * @param i The #cell. * @param parts The #part to interact. * @param ind The list of indices of particles in @c ci to interact with. * @param count The number of particles in @c ind. */ -void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, - struct part *restrict parts, int *restrict ind, int count) { +void DOSELF_SUBSET(struct runner *r, const struct cell *c, + struct part *restrict parts, const int *ind, + const int count) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -956,16 +1066,16 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, const float H = cosmo->H; GET_MU0(); - const int count_i = ci->hydro.count; - struct part *restrict parts_j = ci->hydro.parts; + const int count_cell = c->hydro.count; + struct part *restrict parts_j = c->hydro.parts; /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { /* Get a hold of the ith part in ci. */ struct part *pi = &parts[ind[pid]]; - const float pix[3] = {(float)(pi->x[0] - ci->loc[0]), - (float)(pi->x[1] - ci->loc[1]), - (float)(pi->x[2] - ci->loc[2])}; + const float pix[3] = {(float)(pi->x[0] - c->loc[0]), + (float)(pi->x[1] - c->loc[1]), + (float)(pi->x[2] - c->loc[2])}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; @@ -974,7 +1084,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, #endif /* Loop over the parts in cj. */ - for (int pjd = 0; pjd < count_i; pjd++) { + for (int pjd = 0; pjd < count_cell; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; @@ -988,9 +1098,9 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, const float hj = pj->h; /* Compute the pairwise distance. */ - const float pjx[3] = {(float)(pj->x[0] - ci->loc[0]), - (float)(pj->x[1] - ci->loc[1]), - (float)(pj->x[2] - ci->loc[2])}; + const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), + (float)(pj->x[1] - c->loc[1]), + (float)(pj->x[2] - c->loc[2])}; float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; @@ -1036,9 +1146,9 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, * @param ind The list of indices of particles in @c ci to interact with. * @param count The number of particles in @c ind. */ -void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, - struct part *restrict parts, int *restrict ind, - int count) { +void DOSELF_SUBSET_BRANCH(struct runner *r, const struct cell *ci, + struct part *restrict parts, const int *ind, + const int count) { #if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) runner_doself_subset_density_vec(r, ci, parts, ind, count); @@ -1053,11 +1163,14 @@ void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. * @param sid The direction of the pair. * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, - const double *shift) { +void DOPAIR1(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h, const int sid, const double shift[3]) { const struct engine *restrict e = r->e; const struct cosmology *restrict cosmo = e->cosmology; @@ -1069,7 +1182,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, TIMER_TIC; - /* Get the cutoff shift. */ + /* Get the cut_off shift. */ double rshift = 0.0; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; @@ -1091,9 +1204,20 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, 2.02 * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); #endif /* SWIFT_DEBUG_CHECKS */ + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; +#endif + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; + /* Get some other useful values. */ - const double hi_max = ci->hydro.h_max * kernel_gamma - rshift; - const double hj_max = cj->hydro.h_max * kernel_gamma; + const double hi_max = + min(h_max, ci->hydro.h_max_active) * kernel_gamma - rshift; + const double hj_max = min(h_max, cj->hydro.h_max_active) * kernel_gamma; const int count_i = ci->hydro.count; const int count_j = cj->hydro.count; struct part *restrict parts_i = ci->hydro.parts; @@ -1109,17 +1233,28 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, if (CELL_IS_ACTIVE(ci, e)) { - /* Loop over the parts in ci. */ + /* Loop over the *active* parts in ci that are within range (on the axis) + of any particle in cj. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[sort_i[pid].i]; + const char depth_i = pi->depth_h; const float hi = pi->h; /* Skip inactive particles */ if (!PART_IS_ACTIVE(pi, e)) continue; +#ifdef SWIFT_DEBUG_CHECKS + if (hi > ci->hydro.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_i < min_depth) continue; + if (depth_i > max_depth) continue; + /* Is there anything we need to interact with ? */ const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; @@ -1145,7 +1280,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float pjz = pj->x[2] - cj->loc[2]; /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -1187,6 +1322,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hig2) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1208,17 +1348,28 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, if (CELL_IS_ACTIVE(cj, e)) { - /* Loop over the parts in cj. */ + /* Loop over the *active* parts in cj that are within range (on the axis) + of any particle in ci. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; pjd++) { /* Get a hold of the jth part in cj. */ struct part *pj = &parts_j[sort_j[pjd].i]; + const char depth_j = pj->depth_h; const float hj = pj->h; /* Skip inactive particles */ if (!PART_IS_ACTIVE(pj, e)) continue; +#ifdef SWIFT_DEBUG_CHECKS + if (hj > cj->hydro.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_j < min_depth) continue; + if (depth_j > max_depth) continue; + /* Is there anything we need to interact with ? */ const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; @@ -1244,7 +1395,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float piz = pi->x[2] - (cj->loc[2] + shift[2]); /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -1286,6 +1437,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hjg2) { +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1315,11 +1471,13 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, * @param r #runner * @param ci #cell ci * @param cj #cell cj - * + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h) { - const struct engine *restrict e = r->e; + const struct engine *e = r->e; /* Anything to do here? */ if (ci->hydro.count == 0 || cj->hydro.count == 0) return; @@ -1331,73 +1489,30 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { if (!CELL_ARE_PART_DRIFTED(ci, e) || !CELL_ARE_PART_DRIFTED(cj, e)) error("Interacting undrifted cells."); - /* Get the sort ID. */ + /* Get the sort ID. + * Note: this may swap the ci and cj pointers!! */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid_and_swap_cells(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ if (!(ci->hydro.sorted & (1 << sid)) || ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) - error("Interacting unsorted cells."); + error("Interacting unsorted cells (ci)."); + if (!(cj->hydro.sorted & (1 << sid)) || cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - -#ifdef SWIFT_DEBUG_CHECKS - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_i = cell_get_hydro_sorts(ci, sid); - const struct sort_entry *restrict sort_j = cell_get_hydro_sorts(cj, sid); - - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->hydro.count; pid++) { - const struct part *p = &ci->hydro.parts[sort_i[pid].i]; - if (part_is_inhibited(p, e)) continue; - - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - ci->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " - "ci->hydro.dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, - ci->hydro.dx_max_sort_old); - } - for (int pjd = 0; pjd < cj->hydro.count; pjd++) { - const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; - if (part_is_inhibited(p, e)) continue; - - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if ((fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && - (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - cj->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " - "cj->hydro.dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, - cj->hydro.dx_max_sort_old); - } -#endif /* SWIFT_DEBUG_CHECKS */ + error("Interacting unsorted cells (cj)."); #if defined(SWIFT_USE_NAIVE_INTERACTIONS) - DOPAIR1_NAIVE(r, ci, cj); + DOPAIR1_NAIVE(r, ci, cj, limit_min_h, limit_max_h); #elif defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) if (!sort_is_corner(sid)) runner_dopair1_density_vec(r, ci, cj, sid, shift); else - DOPAIR1(r, ci, cj, sid, shift); + DOPAIR1(r, ci, cj, limit_min_h, limit_max_h, sid, shift); #else - DOPAIR1(r, ci, cj, sid, shift); + DOPAIR1(r, ci, cj, limit_min_h, limit_max_h, sid, shift); #endif } @@ -1407,14 +1522,18 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. * @param sid The direction of the pair * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, - const double *shift) { +void DOPAIR2(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h, const int sid, const double shift[3]) { const struct engine *restrict e = r->e; const struct cosmology *restrict cosmo = e->cosmology; + #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) const double time_base = e->time_base; const integertime_t t_current = e->ti_current; @@ -1423,7 +1542,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, TIMER_TIC; - /* Get the cutoff shift. */ + /* Get the cut_off shift. */ double rshift = 0.0; for (int k = 0; k < 3; k++) rshift += shift[k] * runner_shift[sid][k]; @@ -1445,7 +1564,19 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, 2.02 * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); #endif /* SWIFT_DEBUG_CHECKS */ + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; +#endif + /* Get some other useful values. */ + const int local_i = ci->nodeID == e->nodeID; + const int local_j = cj->nodeID == e->nodeID; const double hi_max = ci->hydro.h_max; const double hj_max = cj->hydro.h_max; const int count_i = ci->hydro.count; @@ -1486,7 +1617,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Collect the active particles in ci */ for (int k = 0; k < count_i; k++) { - if (PART_IS_ACTIVE(&parts_i[sort_i[k].i], e)) { + const struct part *p = &parts_i[sort_i[k].i]; + const char depth = p->depth_h; + + if (PART_IS_ACTIVE(p, e) && (depth >= min_depth) && + (depth <= max_depth)) { sort_active_i[count_active_i] = sort_i[k]; count_active_i++; } @@ -1505,7 +1640,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Collect the active particles in cj */ for (int k = 0; k < count_j; k++) { - if (PART_IS_ACTIVE(&parts_j[sort_j[k].i], e)) { + const struct part *p = &parts_j[sort_j[k].i]; + const char depth = p->depth_h; + + if (PART_IS_ACTIVE(p, e) && (depth >= min_depth) && + (depth <= max_depth)) { sort_active_j[count_active_j] = sort_j[k]; count_active_j++; } @@ -1521,6 +1660,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the ith part in ci. */ struct part *pi = &parts_i[sort_i[pid].i]; + const char depth_i = pi->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pi, e)) continue; @@ -1537,9 +1677,12 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float piy = pi->x[1] - shift_i[1]; const float piz = pi->x[2] - shift_i[2]; + const int update_i = PART_IS_ACTIVE(pi, e) && local_i && + (depth_i >= min_depth) && (depth_i <= max_depth); + /* Do we need to only check active parts in cj (i.e. pi does not need updating) ? */ - if (!PART_IS_ACTIVE(pi, e)) { + if (!update_i) { /* Loop over the *active* parts in cj within range of pi */ for (int pjd = 0; pjd < count_active_j && sort_active_j[pjd].d < di; @@ -1606,6 +1749,12 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? (note that we will do the other condition in the reverse loop) */ if (r2 < hig2) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1631,6 +1780,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; + const char depth_j = pj->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; @@ -1681,13 +1831,26 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, if (pj->ti_drift != e->ti_current) error("Particle pj not drifted to current time"); #endif + #endif + /* Hit or miss? (note that we will do the other condition in the reverse loop) */ if (r2 < hig2) { + const int doj = PART_IS_ACTIVE(pj, e) && local_j && + (depth_j >= min_depth) && (depth_j <= max_depth); + /* Does pj need to be updated too? */ - if (PART_IS_ACTIVE(pj, e)) { + if (doj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT(r2, dx, hi, hj, pi, pj, a, H); IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1703,6 +1866,11 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, t_current, cosmo, with_cosmology); #endif } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1733,6 +1901,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the jth part in cj. */ struct part *pj = &parts_j[sort_j[pjd].i]; + const char depth_j = pj->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; @@ -1749,9 +1918,12 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float pjy = pj->x[1] - shift_j[1]; const float pjz = pj->x[2] - shift_j[2]; + const int update_j = PART_IS_ACTIVE(pj, e) && local_j && + (depth_j >= min_depth) && (depth_j <= max_depth); + /* Do we need to only check active parts in ci (i.e. pj does not need updating) ? */ - if (!PART_IS_ACTIVE(pj, e)) { + if (!update_j) { /* Loop over the *active* parts in ci. */ for (int pid = count_active_i - 1; @@ -1818,6 +1990,12 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? (note that we must avoid the r2 < hig2 cases we already processed) */ if (r2 < hjg2 && r2 >= hig2) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1844,6 +2022,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Recover pi */ struct part *pi = &parts_i[sort_i[pid].i]; + const char depth_i = pi->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pi, e)) continue; @@ -1900,8 +2079,19 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, (note that we must avoid the r2 < hig2 cases we already processed) */ if (r2 < hjg2 && r2 >= hig2) { + const int doi = PART_IS_ACTIVE(pi, e) && local_i && + (depth_i >= min_depth) && (depth_i <= max_depth); + /* Does pi need to be updated too? */ - if (PART_IS_ACTIVE(pi, e)) { + if (doi) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT(r2, dx, hj, hi, pj, pi, a, H); IACT_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1917,6 +2107,12 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, t_current, cosmo, with_cosmology); #endif } else { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -1954,11 +2150,13 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, * @param r #runner * @param ci #cell ci * @param cj #cell cj - * + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h) { - const struct engine *restrict e = r->e; + const struct engine *e = r->e; /* Anything to do here? */ if (ci->hydro.count == 0 || cj->hydro.count == 0) return; @@ -1977,66 +2175,22 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { /* Have the cells been sorted? */ if (!(ci->hydro.sorted & (1 << sid)) || ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) - error("Interacting unsorted cells."); + error("Interacting unsorted cells (ci)."); + if (!(cj->hydro.sorted & (1 << sid)) || cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - -#ifdef SWIFT_DEBUG_CHECKS - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_i = cell_get_hydro_sorts(ci, sid); - const struct sort_entry *restrict sort_j = cell_get_hydro_sorts(cj, sid); - - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->hydro.count; pid++) { - const struct part *p = &ci->hydro.parts[sort_i[pid].i]; - if (part_is_inhibited(p, e)) continue; - - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - ci->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " - "ci->hydro.dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, - ci->hydro.dx_max_sort_old); - } - for (int pjd = 0; pjd < cj->hydro.count; pjd++) { - const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; - if (part_is_inhibited(p, e)) continue; - - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort > - 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && - fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort > - cj->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " - "cj->hydro.dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, - cj->hydro.dx_max_sort_old); - } -#endif /* SWIFT_DEBUG_CHECKS */ + error("Interacting unsorted cells (cj)."); #ifdef SWIFT_USE_NAIVE_INTERACTIONS - DOPAIR2_NAIVE(r, ci, cj); + DOPAIR2_NAIVE(r, ci, cj, limit_min_h, limit_max_h); #elif defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) if (!sort_is_corner(sid)) runner_dopair2_force_vec(r, ci, cj, sid, shift); else - DOPAIR2(r, ci, cj, sid, shift); + DOPAIR2(r, ci, cj, limit_min_h, limit_max_h, sid, shift); #else - DOPAIR2(r, ci, cj, sid, shift); + DOPAIR2(r, ci, cj, limit_min_h, limit_max_h, sid, shift); #endif } @@ -2045,8 +2199,11 @@ void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { * * @param r The #runner. * @param c The #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOSELF1(struct runner *r, struct cell *restrict c) { +void DOSELF1(struct runner *r, const struct cell *c, const int limit_min_h, + const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -2058,50 +2215,73 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { TIMER_TIC; - struct part *restrict parts = c->hydro.parts; + struct part *parts = c->hydro.parts; const int count = c->hydro.count; - /* Set up indt. */ + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif + + /* Set up a list of the particles for which we want to compute interactions */ int *indt = NULL; int countdt = 0, firstdt = 0; if (posix_memalign((void **)&indt, VEC_SIZE * sizeof(int), count * sizeof(int)) != 0) error("Failed to allocate indt."); - for (int k = 0; k < count; k++) - if (PART_IS_ACTIVE(&parts[k], e)) { + for (int k = 0; k < count; k++) { + const struct part *p = &parts[k]; + const char depth = p->depth_h; + if (PART_IS_ACTIVE(p, e) && (depth >= min_depth) && (depth <= max_depth)) { indt[countdt] = k; countdt += 1; } + } - /* Cosmological terms and physical constants */ + /* Cosmological terms */ const float a = cosmo->a; const float H = cosmo->H; GET_MU0(); - /* Loop over the particles in the cell. */ - for (int pid = 0; pid < count; pid++) { + /* Loop over *all* the particles (i.e. the ones to update and not to update). + * + * Note the additional condition to make the loop abort if all the active + * particles have been processed. */ + for (int pid = 0; pid < count && firstdt < countdt; pid++) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; + const char depth_i = pi->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pi, e)) continue; - /* Get the particle position and radius. */ - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k]; + /* Get the particle position and (square of) search radius. */ + const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; - /* Is the ith particle inactive? */ - if (!PART_IS_ACTIVE(pi, e)) { + /* Is the ith particle active and in the range of h we care about? */ + const int update_i = PART_IS_ACTIVE(pi, e) && (depth_i >= min_depth) && + (depth_i <= max_depth); + + /* If false then it can only act as a neighbour of others */ + if (!update_i) { - /* Loop over the other particles .*/ + /* Loop over the particles we want to update. */ for (int pjd = firstdt; pjd < countdt; pjd++) { - /* Get a pointer to the jth particle. */ + /* Get a pointer to the jth particle. (by construction pi != pj) */ struct part *restrict pj = &parts[indt[pjd]]; + + /* This particle's (square of) search radius. */ const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; #if defined(SWIFT_DEBUG_CHECKS) && defined(DO_DRIFT_DEBUG_CHECKS) /* Check that particles have been drifted to the current time */ @@ -2111,16 +2291,19 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { error("Particle pj not drifted to current time"); #endif - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pj->x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } + /* Compute the (square of) pairwise distance. */ + const double pjx[3] = {pj->x[0], pj->x[1], pj->x[2]}; + const float dx[3] = {(float)(pjx[0] - pix[0]), (float)(pjx[1] - pix[1]), + (float)(pjx[2] - pix[2])}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ - if (r2 < hj * hj * kernel_gamma2) { + if (r2 < hjg2) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); @@ -2137,37 +2320,32 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { t_current, cosmo, with_cosmology); #endif } - } /* loop over all other particles. */ + } /* loop over all the particles we want to update. */ } /* Otherwise, interact with all candidates. */ else { - /* We caught a live one! */ + /* We caught a live one! + * Move the start of the list of active ones by one slot as it will have + * been fully processed after the following loop so no need to consider it + * in the previous loop any more. */ firstdt += 1; - /* Loop over the other particles .*/ + /* Loop over *all* the particles (i.e. the ones to update and not to + * update) but starting from where we are in the overall list. */ for (int pjd = pid + 1; pjd < count; pjd++) { - /* Get a pointer to the jth particle. */ + /* Get a pointer to the jth particle (by construction pi != pj). */ struct part *restrict pj = &parts[pjd]; + const char depth_j = pj->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; + /* This particle's (square of) search radius. */ const float hj = pj->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } - const int doj = - (PART_IS_ACTIVE(pj, e)) && (r2 < hj * hj * kernel_gamma2); - - const int doi = (r2 < hig2); + const float hjg2 = hj * hj * kernel_gamma2; #if defined(SWIFT_DEBUG_CHECKS) && defined(DO_DRIFT_DEBUG_CHECKS) /* Check that particles have been drifted to the current time */ @@ -2177,67 +2355,101 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { error("Particle pj not drifted to current time"); #endif - /* Hit or miss? */ - if (doi || doj) { + /* Compute the (square of) pairwise distance. */ + const double pjx[3] = {pj->x[0], pj->x[1], pj->x[2]}; + float dx[3] = {(float)(pix[0] - pjx[0]), (float)(pix[1] - pjx[1]), + (float)(pix[2] - pjx[2])}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Which parts need to be updated? */ - if (doi && doj) { + /* Decide which of the two particles to update */ - IACT(r2, dx, hi, hj, pi, pj, a, H); - IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); -#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_sink(r2, dx, hi, hj, pi, pj, a, H); + /* We know pi is active and in the right range of h + * -> Only check the distance to pj */ + const int doi = (r2 < hig2); + + /* We know nothing about pj + * -> Check whether it is active + * -> Check whether it is in the right range of h + * -> Check the distance to pi */ + const int doj = (PART_IS_ACTIVE(pj, e)) && (depth_j >= min_depth) && + (depth_j <= max_depth) && (r2 < hjg2); + + /* Hit or miss? */ + if (doi && doj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + /* Update both pi and pj */ + + IACT(r2, dx, hi, hj, pi, pj, a, H); + IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); +#if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) + runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_sink(r2, dx, hi, hj, pi, pj, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, - t_current, cosmo, with_cosmology); + runner_iact_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } else if (doi) { + } else if (doi) { - IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); - IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif + /* Update only pi */ + + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); + IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_nonsym_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_diffusion(r2, dx, hi, hj, pi, pj, a, H, - time_base, t_current, cosmo, - with_cosmology); + runner_iact_nonsym_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } else if (doj) { + } else if (doj) { - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; - IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); - IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + + /* Update only pj */ + + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); + IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_sink(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_sink(r2, dx, hj, hi, pj, pi, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_nonsym_timebin(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_rt_timebin(r2, dx, hj, hi, pj, pi, a, H); - runner_iact_nonsym_diffusion(r2, dx, hj, hi, pj, pi, a, H, - time_base, t_current, cosmo, - with_cosmology); + runner_iact_nonsym_timebin(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_rt_timebin(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_diffusion(r2, dx, hj, hi, pj, pi, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } - } + } /* Hit or miss */ } /* loop over all other particles. */ - } + } /* pi is active */ } /* loop over all particles. */ free(indt); @@ -2251,11 +2463,13 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { * * @param r #runner * @param c #cell c - * + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOSELF1_BRANCH(struct runner *r, struct cell *c) { +void DOSELF1_BRANCH(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { - const struct engine *restrict e = r->e; + const struct engine *e = r->e; /* Anything to do here? */ if (c->hydro.count == 0) return; @@ -2263,20 +2477,28 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) { /* Anything to do here? */ if (!CELL_IS_ACTIVE(c, e)) return; +#ifdef SWIFT_DEBUG_CHECKS + /* Did we mess up the recursion? */ if (c->hydro.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); + if (!limit_max_h && c->hydro.h_max_active * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + /* Did we mess up the recursion? */ + if (limit_min_h && !limit_max_h) + error("Fundamental error in the recursion logic"); +#endif /* Check that cells are drifted. */ if (!CELL_ARE_PART_DRIFTED(c, e)) error("Interacting undrifted cell."); #if defined(SWIFT_USE_NAIVE_INTERACTIONS) - DOSELF1_NAIVE(r, c); + DOSELF1_NAIVE(r, c, limit_min_h, limit_max_h); #elif defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_doself1_density_vec(r, c); #else - DOSELF1(r, c); + DOSELF1(r, c, limit_min_h, limit_max_h); #endif } @@ -2285,8 +2507,11 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) { * * @param r The #runner. * @param c The #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOSELF2(struct runner *r, struct cell *restrict c) { +void DOSELF2(struct runner *r, const struct cell *c, const int limit_min_h, + const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -2298,58 +2523,73 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { TIMER_TIC; - struct part *restrict parts = c->hydro.parts; + struct part *parts = c->hydro.parts; const int count = c->hydro.count; - /* Set up indt. */ + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif + + /* Set up a list of the particles for which we want to compute interactions */ int *indt = NULL; int countdt = 0, firstdt = 0; if (posix_memalign((void **)&indt, VEC_SIZE * sizeof(int), count * sizeof(int)) != 0) error("Failed to allocate indt."); - for (int k = 0; k < count; k++) - if (PART_IS_ACTIVE(&parts[k], e)) { + for (int k = 0; k < count; k++) { + const struct part *p = &parts[k]; + const char depth = p->depth_h; + if (PART_IS_ACTIVE(p, e) && (depth >= min_depth) && (depth <= max_depth)) { indt[countdt] = k; countdt += 1; } + } /* Cosmological terms and physical constants */ const float a = cosmo->a; const float H = cosmo->H; GET_MU0(); - /* Loop over the particles in the cell. */ - for (int pid = 0; pid < count; pid++) { + /* Loop over *all* the particles (the ones to update and others!) in the cell. + * + * Note the additional condition to make the loop abort if all the active + * particles have been processed. */ + for (int pid = 0; pid < count && firstdt < countdt; pid++) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; + const char depth_i = pi->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pi, e)) continue; - /* Get the particle position and radius. */ - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k]; + /* Get the particle position and (square of) search radius. */ + const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; - /* Is the ith particle not active? */ - if (!PART_IS_ACTIVE(pi, e)) { + /* Is the ith particle active and in the range of h we care about? */ + const int update_i = PART_IS_ACTIVE(pi, e) && (depth_i >= min_depth) && + (depth_i <= max_depth); - /* Loop over the other particles .*/ + /* If false then it can only act as a neighbour of others */ + if (!update_i) { + + /* Loop over the active particles we want to update. */ for (int pjd = firstdt; pjd < countdt; pjd++) { - /* Get a pointer to the jth particle. */ + /* Get a pointer to the jth particle. (by construction pi != pj) */ struct part *restrict pj = &parts[indt[pjd]]; - const float hj = pj->h; - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pj->x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } + /* This particle's (square of) search radius. */ + const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; #if defined(SWIFT_DEBUG_CHECKS) && defined(DO_DRIFT_DEBUG_CHECKS) /* Check that particles have been drifted to the current time */ @@ -2359,8 +2599,19 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { error("Particle pj not drifted to current time"); #endif + /* Compute the (square of) pairwise distance. */ + const double pjx[3] = {pj->x[0], pj->x[1], pj->x[2]}; + const float dx[3] = {(float)(pjx[0] - pix[0]), (float)(pjx[1] - pix[1]), + (float)(pjx[2] - pix[2])}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Hit or miss? */ - if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { + if (r2 < hig2 || r2 < hjg2) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); IACT_NONSYM_MHD(r2, dx, hj, hi, pj, pi, mu_0, a, H); @@ -2383,27 +2634,26 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { /* Otherwise, interact with all candidates. */ else { - /* We caught a live one! */ + /* We caught a live one! + * Move the start of the list of active ones by one slot as it will have + * been fully processed after the following loop so no need to consider it + * in the previous loop any more. */ firstdt += 1; - /* Loop over the other particles .*/ + /* Loop over *all* the particles (i.e. the ones to update and not to + * update) but starting from where we are in the overall list. */ for (int pjd = pid + 1; pjd < count; pjd++) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; + const char depth_j = pj->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; + /* This particle's (square of) search radius. */ const float hj = pj->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + const float hjg2 = hj * hj * kernel_gamma2; #if defined(SWIFT_DEBUG_CHECKS) && defined(DO_DRIFT_DEBUG_CHECKS) /* Check that particles have been drifted to the current time */ @@ -2413,45 +2663,86 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { error("Particle pj not drifted to current time"); #endif + /* Compute the (square of) pairwise distance. */ + const double pjx[3] = {pj->x[0], pj->x[1], pj->x[2]}; + float dx[3] = {(float)(pix[0] - pjx[0]), (float)(pix[1] - pjx[1]), + (float)(pix[2] - pjx[2])}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + + /* Decide which of the two particles to update */ + + /* We know pi is active and in the right range of h + * -> Only check the distance to pj */ + const int doi = (r2 < hig2) || (r2 < hjg2); + + /* We know nothing about pj + * -> Check whether it is active + * -> Check whether it is in the right range of h + * -> Check the distance to pi */ + const int doj = (PART_IS_ACTIVE(pj, e)) && (depth_j >= min_depth) && + (depth_j <= max_depth) && ((r2 < hjg2) || (r2 < hig2)); + /* Hit or miss? */ - if (r2 < hig2 || r2 < hj * hj * kernel_gamma2) { + if (doi && doj) { - /* Does pj need to be updated too? */ - if (PART_IS_ACTIVE(pj, e)) { - IACT(r2, dx, hi, hj, pi, pj, a, H); - IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + + /* Update both pi and pj */ + + IACT(r2, dx, hi, hj, pi, pj, a, H); + IACT_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_sink(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_sink(r2, dx, hi, hj, pi, pj, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, - t_current, cosmo, with_cosmology); + runner_iact_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } else { - IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); - IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); + } else if (doi) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif + + /* Update only pi */ + + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); + IACT_NONSYM_MHD(r2, dx, hi, hj, pi, pj, mu_0, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_sink(r2, dx, hi, hj, pi, pj, a, H); #endif #if (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) - runner_iact_nonsym_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); - runner_iact_nonsym_diffusion(r2, dx, hi, hj, pi, pj, a, H, - time_base, t_current, cosmo, - with_cosmology); + runner_iact_nonsym_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_rt_timebin(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_diffusion(r2, dx, hi, hj, pi, pj, a, H, time_base, + t_current, cosmo, with_cosmology); #endif - } - } + } else if (doj) { + + /* Update only doj + * + * Note: This is impossible since if doj==True so does doi */ + +#ifdef SWIFT_DEBUG_CHECKS + error("Impossible problem in the logic!!!"); +#endif + } /* Hit or miss */ } /* loop over all other particles. */ - } + } /* pi is active */ } /* loop over all particles. */ free(indt); @@ -2465,11 +2756,13 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { * * @param r #runner * @param c #cell c - * + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. */ -void DOSELF2_BRANCH(struct runner *r, struct cell *c) { +void DOSELF2_BRANCH(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { - const struct engine *restrict e = r->e; + const struct engine *e = r->e; /* Anything to do here? */ if (c->hydro.count == 0) return; @@ -2477,20 +2770,29 @@ void DOSELF2_BRANCH(struct runner *r, struct cell *c) { /* Anything to do here? */ if (!CELL_IS_ACTIVE(c, e)) return; +#ifdef SWIFT_DEBUG_CHECKS + /* Did we mess up the recursion? */ if (c->hydro.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); + if (!limit_max_h && c->hydro.h_max_active * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + /* Did we mess up the recursion? */ + if (limit_min_h && !limit_max_h) + error("Fundamental error in the recursion logic"); + +#endif /* Check that cells are drifted. */ if (!CELL_ARE_PART_DRIFTED(c, e)) error("Interacting undrifted cell."); #if defined(SWIFT_USE_NAIVE_INTERACTIONS) - DOSELF2_NAIVE(r, c); + DOSELF2_NAIVE(r, c, limit_min_h, limit_max_h); #elif defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && \ (FUNCTION_TASK_LOOP == TASK_LOOP_FORCE) runner_doself2_force_vec(r, c); #else - DOSELF2(r, c); + DOSELF2(r, c, limit_min_h, limit_max_h); #endif } @@ -2500,13 +2802,12 @@ void DOSELF2_BRANCH(struct runner *r, struct cell *c) { * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param recurse_below_h_max Are we currently recursing at a level where we + * violated the h < cell size condition. * @param gettimer Do we have a timer ? - * - * @todo Hard-code the sid on the recursive calls to avoid the - * redundant computations to find the sid on-the-fly. */ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { + int recurse_below_h_max, const int gettimer) { struct space *s = r->e->s; const struct engine *e = r->e; @@ -2521,41 +2822,81 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, double shift[3]; const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); - /* Recurse? */ - if (cell_can_recurse_in_pair_hydro_task(ci) && - cell_can_recurse_in_pair_hydro_task(cj)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1(r, ci->progeny[pid], cj->progeny[pjd], 0); + /* We reached a leaf OR a cell small enough to be processed quickly */ + if (!ci->split || ci->hydro.count < space_recurse_size_pair_hydro || + !cj->split || cj->hydro.count < space_recurse_size_pair_hydro) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); } - } - /* Otherwise, compute the pair directly. */ - else if (CELL_IS_ACTIVE(ci, e) || CELL_IS_ACTIVE(cj, e)) { + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOPAIR1_BRANCH(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); + + } else { - /* Make sure both cells are drifted to the current timestep. */ - if (!CELL_ARE_PART_DRIFTED(ci, e) || !CELL_ARE_PART_DRIFTED(cj, e)) - error("Interacting undrifted cells."); + /* Both ci and cj are split */ - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) - error( - "Interacting unsorted cell. ci->hydro.dx_max_sort_old=%e ci->dmin=%e " - "ci->sorted=%d sid=%d", - ci->hydro.dx_max_sort_old, ci->dmin, ci->hydro.sorted, sid); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error( - "Interacting unsorted cell. cj->hydro.dx_max_sort_old=%e cj->dmin=%e " - "cj->sorted=%d sid=%d", - cj->hydro.dx_max_sort_old, cj->dmin, cj->hydro.sorted, sid); - - /* Compute the interactions. */ - DOPAIR1_BRANCH(r, ci, cj); + /* Should we change the recursion regime because we encountered a large + particle? */ + if (!recurse_below_h_max && (!cell_can_recurse_in_subpair_hydro_task(ci) || + !cell_can_recurse_in_subpair_hydro_task(cj))) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + + /* message("Multi-level PAIR! ci->count=%d cj->count=%d", ci->hydro.count, + */ + /* cj->hydro.count); */ + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOPAIR1_BRANCH(r, ci, cj, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + const struct cell_split_pair *const csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) { + DOSUB_PAIR1(r, ci->progeny[pid], cj->progeny[pjd], recurse_below_h_max, + /*gettimer=*/0); + } + } } if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); @@ -2565,36 +2906,60 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, * @brief Compute grouped sub-cell interactions for self tasks * * @param r The #runner. - * @param ci The first #cell. + * @param c The #cell. + * @param recurse_below_h_max Are we currently recursing at a level where we + * violated the h < cell size condition. * @param gettimer Do we have a timer ? */ -void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { +void DOSUB_SELF1(struct runner *r, struct cell *c, int recurse_below_h_max, + const int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (ci->hydro.count == 0 || !CELL_IS_ACTIVE(ci, r->e)) return; + if (c->hydro.count == 0 || !CELL_IS_ACTIVE(c, r->e)) return; - /* Recurse? */ - if (cell_can_recurse_in_self_hydro_task(ci)) { + /* We reached a leaf OR a cell small enough to process quickly */ + if (!c->split || c->hydro.count < space_recurse_size_self_hydro) { - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1(r, ci->progeny[k], ci->progeny[j], 0); - } - } + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOSELF1_BRANCH(r, c, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); - /* Otherwise, compute self-interaction. */ - else { + } else { - /* Drift the cell to the current timestep if needed. */ - if (!CELL_ARE_PART_DRIFTED(ci, r->e)) error("Interacting undrifted cell."); + /* Should we change the recursion regime because we encountered a large + particle at this level? */ + if (!recurse_below_h_max && !cell_can_recurse_in_subself_hydro_task(c)) { + recurse_below_h_max = 1; + } - DOSELF1_BRANCH(r, ci); + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* message("Multi-level SELF! c->count=%d", c->hydro.count); */ + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOSELF1_BRANCH(r, c, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + DOSUB_SELF1(r, c->progeny[k], recurse_below_h_max, /*gettimer=*/0); + for (int j = k + 1; j < 8; j++) { + if (c->progeny[j] != NULL) { + DOSUB_PAIR1(r, c->progeny[k], c->progeny[j], recurse_below_h_max, + /*gettimer=*/0); + } + } + } + } } if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); @@ -2612,7 +2977,7 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { * redundant computations to find the sid on-the-fly. */ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { + int recurse_below_h_max, const int gettimer) { const struct engine *e = r->e; struct space *s = e->s; @@ -2627,41 +2992,80 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, double shift[3]; const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); - /* Recurse? */ - if (cell_can_recurse_in_pair_hydro_task(ci) && - cell_can_recurse_in_pair_hydro_task(cj)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR2(r, ci->progeny[pid], cj->progeny[pjd], 0); + /* We reached a leaf OR a cell small enough to be processed quickly */ + if (!ci->split || ci->hydro.count < space_recurse_size_pair_hydro || + !cj->split || cj->hydro.count < space_recurse_size_pair_hydro) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); } - } - /* Otherwise, compute the pair directly. */ - else if (CELL_IS_ACTIVE(ci, e) || CELL_IS_ACTIVE(cj, e)) { + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOPAIR2_BRANCH(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); - /* Make sure both cells are drifted to the current timestep. */ - if (!CELL_ARE_PART_DRIFTED(ci, e) || !CELL_ARE_PART_DRIFTED(cj, e)) - error("Interacting undrifted cells."); + } else { - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) - error( - "Interacting unsorted cell. ci->hydro.dx_max_sort_old=%e ci->dmin=%e " - "ci->sorted=%d sid=%d", - ci->hydro.dx_max_sort_old, ci->dmin, ci->hydro.sorted, sid); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error( - "Interacting unsorted cell. cj->hydro.dx_max_sort_old=%e cj->dmin=%e " - "cj->sorted=%d sid=%d", - cj->hydro.dx_max_sort_old, cj->dmin, cj->hydro.sorted, sid); - - /* Compute the interactions. */ - DOPAIR2_BRANCH(r, ci, cj); + /* Should we change the recursion regime because we encountered a large + particle? */ + if (!recurse_below_h_max && + (!cell_can_recurse_in_subpair2_hydro_task(ci) || + !cell_can_recurse_in_subpair2_hydro_task(cj))) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + + /* message("Multi-level PAIR! ci->count=%d cj->count=%d", ci->hydro.count, + */ + /* cj->hydro.count); */ + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOPAIR2_BRANCH(r, ci, cj, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + const struct cell_split_pair *const csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) { + DOSUB_PAIR2(r, ci->progeny[pid], cj->progeny[pjd], recurse_below_h_max, + /*gettimer=*/0); + } + } } if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); @@ -2674,36 +3078,96 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, * @param ci The first #cell. * @param gettimer Do we have a timer ? */ -void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer) { +void DOSUB_SELF2(struct runner *r, struct cell *c, int recurse_below_h_max, + const int gettimer) { TIMER_TIC; /* Should we even bother? */ - if (ci->hydro.count == 0 || !CELL_IS_ACTIVE(ci, r->e)) return; + if (c->hydro.count == 0 || !CELL_IS_ACTIVE(c, r->e)) return; - /* Recurse? */ - if (cell_can_recurse_in_self_hydro_task(ci)) { + /* We reached a leaf OR a cell small enough to process quickly */ + if (!c->split || c->hydro.count < space_recurse_size_self_hydro) { - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF2(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR2(r, ci->progeny[k], ci->progeny[j], 0); - } + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOSELF2_BRANCH(r, c, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); - } + } else { - /* Otherwise, compute self-interaction. */ - else { - DOSELF2_BRANCH(r, ci); + /* Should we change the recursion regime because we encountered a large + particle at this level? */ + if (!recurse_below_h_max && !cell_can_recurse_in_subself2_hydro_task(c)) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* message("Multi-level SELF! c->count=%d", c->hydro.count); */ + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOSELF2_BRANCH(r, c, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + DOSUB_SELF2(r, c->progeny[k], recurse_below_h_max, /*gettimer=*/0); + for (int j = k + 1; j < 8; j++) { + if (c->progeny[j] != NULL) { + DOSUB_PAIR2(r, c->progeny[k], c->progeny[j], recurse_below_h_max, + /*gettimer=*/0); + } + } + } + } } + if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); } -void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, - int *ind, int count, struct cell *cj, int gettimer) { +/** + * @brief Find which sub-cell of a cell contain the subset of particles given + * by the list of indices. + * + * Will throw an error if the sub-cell can't be found. + * + * @param c The #cell + * @param parts An array of #part. + * @param ind Index of the #part's in the particle array to find in the subs. + */ +struct cell *FIND_SUB(const struct cell *const c, + const struct part *const parts, const int *const ind) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!c->split) error("Can't search for subs in a non-split cell"); +#endif + + /* Find out in which sub-cell of ci the parts are. + * + * Note: We only need to check the first particle in the list */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + if (&parts[ind[0]] >= &c->progeny[k]->hydro.parts[0] && + &parts[ind[0]] < + &c->progeny[k]->hydro.parts[c->progeny[k]->hydro.count]) { + return c->progeny[k]; + } + } + } + error("Invalid sub!"); + return NULL; +} + +void DOSUB_PAIR_SUBSET(struct runner *r, struct cell *ci, struct part *parts, + const int *ind, const int count, struct cell *cj, + const int gettimer) { const struct engine *e = r->e; struct space *s = e->s; @@ -2711,79 +3175,71 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, TIMER_TIC; /* Should we even bother? */ - if (!cell_is_active_hydro(ci, e) && - (cj == NULL || !cell_is_active_hydro(cj, e))) - return; - if (ci->hydro.count == 0 || (cj != NULL && cj->hydro.count == 0)) return; - - /* Find out in which sub-cell of ci the parts are. */ - struct cell *sub = NULL; - if (ci->split) { - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) { - if (&parts[ind[0]] >= &ci->progeny[k]->hydro.parts[0] && - &parts[ind[0]] < - &ci->progeny[k]->hydro.parts[ci->progeny[k]->hydro.count]) { - sub = ci->progeny[k]; - break; - } - } - } - } + if (ci->hydro.count == 0 || cj->hydro.count == 0) return; + if (!cell_is_active_hydro(ci, e)) return; - /* Is this a single cell? */ - if (cj == NULL) { + /* Recurse? */ + if (cell_can_recurse_in_pair_hydro_task(ci) && + cell_can_recurse_in_pair_hydro_task(cj)) { - /* Recurse? */ - if (cell_can_recurse_in_self_hydro_task(ci)) { + /* Find in which sub-cell of ci the particles are */ + struct cell *const sub = FIND_SUB(ci, parts, ind); - /* Loop over all progeny. */ - DOSUB_SUBSET(r, sub, parts, ind, count, NULL, 0); - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != sub && ci->progeny[j] != NULL) - DOSUB_SUBSET(r, sub, parts, ind, count, ci->progeny[j], 0); + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3]; + const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) + DOSUB_PAIR_SUBSET(r, ci->progeny[pid], parts, ind, count, + cj->progeny[pjd], + /*gettimer=*/0); + if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) + DOSUB_PAIR_SUBSET(r, cj->progeny[pjd], parts, ind, count, + ci->progeny[pid], + /*gettimer=*/0); } - /* Otherwise, compute self-interaction. */ - else - DOSELF_SUBSET_BRANCH(r, ci, parts, ind, count); - } /* self-interaction. */ + } + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_hydro(ci, e)) { - /* Otherwise, it's a pair interaction. */ - else { + /* Do any of the cells need to be drifted first? */ + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); - /* Recurse? */ - if (cell_can_recurse_in_pair_hydro_task(ci) && - cell_can_recurse_in_pair_hydro_task(cj)) { - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); - - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) - DOSUB_SUBSET(r, ci->progeny[pid], parts, ind, count, cj->progeny[pjd], - 0); - if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) - DOSUB_SUBSET(r, cj->progeny[pjd], parts, ind, count, ci->progeny[pid], - 0); - } - } + DOPAIR_SUBSET_BRANCH(r, ci, parts, ind, count, cj); + } - /* Otherwise, compute the pair directly. */ - else if (cell_is_active_hydro(ci, e) || cell_is_active_hydro(cj, e)) { + if (gettimer) TIMER_TOC(timer_dosub_subset); +} - /* Do any of the cells need to be drifted first? */ - if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); +void DOSUB_SELF_SUBSET(struct runner *r, struct cell *ci, struct part *parts, + const int *ind, const int count, const int gettimer) { - DOPAIR_SUBSET_BRANCH(r, ci, parts, ind, count, cj); - } + const struct engine *e = r->e; - } /* otherwise, pair interaction. */ + /* Should we even bother? */ + if (ci->hydro.count == 0) return; + if (!cell_is_active_hydro(ci, e)) return; - if (gettimer) TIMER_TOC(timer_dosub_subset); + /* Recurse? */ + if (ci->split && cell_can_recurse_in_self_hydro_task(ci)) { + + /* Find in which sub-cell of ci the particles are */ + struct cell *const sub = FIND_SUB(ci, parts, ind); + + /* Loop over all progeny. */ + DOSUB_SELF_SUBSET(r, sub, parts, ind, count, /*gettimer=*/0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_PAIR_SUBSET(r, sub, parts, ind, count, ci->progeny[j], + /*gettimer=*/0); + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF_SUBSET_BRANCH(r, ci, parts, ind, count); } diff --git a/src/runner_doiact_functions_limiter.h b/src/runner_doiact_functions_limiter.h index 0d7e07de6aff448ffdf1645bf4cdeaec85094fcc..90f2d1ffe6e139a18617f1df6befcde0b3ce2ba2 100644 --- a/src/runner_doiact_functions_limiter.h +++ b/src/runner_doiact_functions_limiter.h @@ -36,7 +36,8 @@ * @param cj The second #cell. */ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { + struct cell *restrict cj, const int limit_min_h, + const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -46,14 +47,28 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Anything to do here? */ if (!cell_is_starting_hydro(ci, e) && !cell_is_starting_hydro(cj, e)) return; + /* Cosmological terms */ + const float a = cosmo->a; + const float H = cosmo->H; + const int count_i = ci->hydro.count; const int count_j = cj->hydro.count; struct part *restrict parts_i = ci->hydro.parts; struct part *restrict parts_j = cj->hydro.parts; - /* Cosmological terms */ - const float a = cosmo->a; - const float H = cosmo->H; +#ifdef SWIFT_DEBUG_CHECKS + if (ci->dmin != cj->dmin) error("Cells of different size!"); +#endif + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; +#endif /* Get the relative distance between the pairs, wrapping. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -74,6 +89,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, if (part_is_inhibited(pi, e)) continue; const int pi_active = part_is_starting(pi, e); + const char depth_i = pi->depth_h; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - (cj->loc[0] + shift[0])), @@ -85,6 +101,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; + const char depth_j = pj->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; @@ -108,12 +125,25 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, error("Particle pj not drifted to current time"); #endif + const int doi = pi_active && (r2 < hig2) && (depth_i >= min_depth) && + (depth_i <= max_depth); + const int doj = pj_active && (r2 < hjg2) && (depth_j >= min_depth) && + (depth_j <= max_depth); + /* Hit or miss? */ - if (r2 < hig2 && pi_active) { + if (doi) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); } - if (r2 < hjg2 && pj_active) { + if (doj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif dx[0] = -dx[0]; dx[1] = -dx[1]; @@ -136,7 +166,8 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, * @param r The #runner. * @param c The #cell. */ -void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { +void DOSELF1_NAIVE(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -151,7 +182,17 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { const float H = cosmo->H; const int count = c->hydro.count; - struct part *restrict parts = c->hydro.parts; + struct part *parts = c->hydro.parts; + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif /* Loop over the parts in ci. */ for (int pid = 0; pid < count; pid++) { @@ -163,6 +204,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { if (part_is_inhibited(pi, e)) continue; const int pi_active = part_is_starting(pi, e); + const char depth_i = pi->depth_h; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; const float pix[3] = {(float)(pi->x[0] - c->loc[0]), @@ -181,6 +223,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { const float hj = pj->h; const float hjg2 = hj * hj * kernel_gamma2; const int pj_active = part_is_starting(pj, e); + const char depth_j = pj->depth_h; /* Compute the pairwise distance. */ const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), @@ -189,8 +232,10 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { float dx[3] = {pix[0] - pjx[0], pix[1] - pjx[1], pix[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - const int doi = pi_active && (r2 < hig2); - const int doj = pj_active && (r2 < hjg2); + const int doi = pi_active && (r2 < hig2) && (depth_i >= min_depth) && + (depth_i <= max_depth); + const int doj = pj_active && (r2 < hjg2) && (depth_j >= min_depth) && + (depth_j <= max_depth); #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -203,12 +248,25 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { /* Hit or miss? */ if (doi && doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif + IACT(r2, dx, hi, hj, pi, pj, a, H); } else if (doi) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); } else if (doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) error("Inappropriate h for this level!"); +#endif + dx[0] = -dx[0]; dx[1] = -dx[1]; dx[2] = -dx[2]; @@ -230,8 +288,9 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { * @param sid The direction of the pair. * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, - const double *shift) { +void DOPAIR1(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h, const int sid, const double shift[3]) { const struct engine *restrict e = r->e; const struct cosmology *restrict cosmo = e->cosmology; @@ -259,9 +318,20 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, 2. * max(ci->hydro.dx_max_part, cj->hydro.dx_max_part); #endif /* SWIFT_DEBUG_CHECKS */ + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + + /* Get the limits in h (if any). Note ci and cj are the same size */ +#ifdef SWIFT_DEBUG_CHECKS + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; +#endif + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; + /* Get some other useful values. */ - const double hi_max = ci->hydro.h_max * kernel_gamma - rshift; - const double hj_max = cj->hydro.h_max * kernel_gamma; + const double hi_max = + min(h_max, ci->hydro.h_max_active) * kernel_gamma - rshift; + const double hj_max = min(h_max, cj->hydro.h_max_active) * kernel_gamma; const int count_i = ci->hydro.count; const int count_j = cj->hydro.count; struct part *restrict parts_i = ci->hydro.parts; @@ -276,17 +346,28 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, if (cell_is_starting_hydro(ci, e)) { - /* Loop over the parts in ci. */ + /* Loop over the *active* parts in ci that are within range (on the axis) + of any particle in cj. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { /* Get a hold of the ith part in ci. */ struct part *restrict pi = &parts_i[sort_i[pid].i]; + const char depth_i = pi->depth_h; const float hi = pi->h; /* Skip inactive particles */ if (!part_is_starting(pi, e)) continue; +#ifdef SWIFT_DEBUG_CHECKS + if (hi > ci->hydro.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_i < min_depth) continue; + if (depth_i > max_depth) continue; + /* Is there anything we need to interact with ? */ const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; @@ -312,7 +393,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float pjz = pj->x[2] - cj->loc[2]; /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -352,6 +433,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hig2) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); } } /* loop over the parts in cj. */ @@ -366,11 +452,21 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Get a hold of the jth part in cj. */ struct part *pj = &parts_j[sort_j[pjd].i]; + const char depth_j = pj->depth_h; const float hj = pj->h; /* Skip inactive particles */ if (!part_is_starting(pj, e)) continue; +#ifdef SWIFT_DEBUG_CHECKS + if (hj > cj->hydro.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_j < min_depth) continue; + if (depth_j > max_depth) continue; + /* Is there anything we need to interact with ? */ const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; @@ -396,7 +492,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float piz = pi->x[2] - (cj->loc[2] + shift[2]); /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -436,6 +532,11 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, /* Hit or miss? */ if (r2 < hjg2) { +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); } } /* loop over the parts in ci. */ @@ -454,9 +555,10 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, * @param cj #cell cj * */ -void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h) { - const struct engine *restrict e = r->e; + const struct engine *e = r->e; /* Anything to do here? */ if (ci->hydro.count == 0 || cj->hydro.count == 0) return; @@ -468,67 +570,24 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) error("Interacting undrifted cells."); - /* Get the sort ID. */ + /* Get the sort ID. + * Note: this may swap the ci and cj pointers!! */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid_and_swap_cells(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ if (!(ci->hydro.sorted & (1 << sid)) || ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin) - error("Interacting unsorted cells."); + error("Interacting unsorted cells (ci)."); + if (!(cj->hydro.sorted & (1 << sid)) || cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); - -#ifdef SWIFT_DEBUG_CHECKS - /* Pick-out the sorted lists. */ - const struct sort_entry *restrict sort_i = cell_get_hydro_sorts(ci, sid); - const struct sort_entry *restrict sort_j = cell_get_hydro_sorts(cj, sid); - - /* Check that the dx_max_sort values in the cell are indeed an upper - bound on particle movement. */ - for (int pid = 0; pid < ci->hydro.count; pid++) { - const struct part *p = &ci->hydro.parts[sort_i[pid].i]; - if (part_is_inhibited(p, e)) continue; - - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if (fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - 1.0e-4 * max(fabsf(d), ci->hydro.dx_max_sort_old) && - fabsf(d - sort_i[pid].d) - ci->hydro.dx_max_sort > - ci->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell ci. ci->nodeID=%d " - "cj->nodeID=%d d=%e sort_i[pid].d=%e ci->hydro.dx_max_sort=%e " - "ci->hydro.dx_max_sort_old=%e", - ci->nodeID, cj->nodeID, d, sort_i[pid].d, ci->hydro.dx_max_sort, - ci->hydro.dx_max_sort_old); - } - for (int pjd = 0; pjd < cj->hydro.count; pjd++) { - const struct part *p = &cj->hydro.parts[sort_j[pjd].i]; - if (part_is_inhibited(p, e)) continue; - - const float d = p->x[0] * runner_shift[sid][0] + - p->x[1] * runner_shift[sid][1] + - p->x[2] * runner_shift[sid][2]; - if ((fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - 1.0e-4 * max(fabsf(d), cj->hydro.dx_max_sort_old) && - (fabsf(d - sort_j[pjd].d) - cj->hydro.dx_max_sort) > - cj->width[0] * 1.0e-10) - error( - "particle shift diff exceeds dx_max_sort in cell cj. cj->nodeID=%d " - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->hydro.dx_max_sort=%e " - "cj->hydro.dx_max_sort_old=%e", - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->hydro.dx_max_sort, - cj->hydro.dx_max_sort_old); - } -#endif /* SWIFT_DEBUG_CHECKS */ + error("Interacting unsorted cells (cj)."); #if defined(SWIFT_USE_NAIVE_INTERACTIONS) - DOPAIR1_NAIVE(r, ci, cj); + DOPAIR1_NAIVE(r, ci, cj, limit_min_h, limit_max_h); #else - DOPAIR1(r, ci, cj, sid, shift); + DOPAIR1(r, ci, cj, limit_min_h, limit_max_h, sid, shift); #endif } @@ -538,56 +597,81 @@ void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { * @param r The #runner. * @param c The #cell. */ -void DOSELF1(struct runner *r, struct cell *restrict c) { +void DOSELF1(struct runner *r, const struct cell *c, const int limit_min_h, + const int limit_max_h) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; TIMER_TIC; - struct part *restrict parts = c->hydro.parts; + struct part *parts = c->hydro.parts; const int count = c->hydro.count; - /* Set up indt. */ + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif + + /* Set up a list of the particles for which we want to compute interactions */ int *indt = NULL; int countdt = 0, firstdt = 0; if (posix_memalign((void **)&indt, VEC_SIZE * sizeof(int), count * sizeof(int)) != 0) error("Failed to allocate indt."); - for (int k = 0; k < count; k++) - if (part_is_starting(&parts[k], e)) { + for (int k = 0; k < count; k++) { + const struct part *p = &parts[k]; + const char depth = p->depth_h; + if (part_is_starting(&parts[k], e) && (depth >= min_depth) && + (depth <= max_depth)) { indt[countdt] = k; countdt += 1; } + } /* Cosmological terms */ const float a = cosmo->a; const float H = cosmo->H; - /* Loop over the particles in the cell. */ + /* Loop over *all* the particles (i.e. the ones to update and not to update). + * + * Note the additional condition to make the loop abort if all the active + * particles have been processed. */ for (int pid = 0; pid < count; pid++) { /* Get a pointer to the ith particle. */ struct part *restrict pi = &parts[pid]; + const char depth_i = pi->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pi, e)) continue; - /* Get the particle position and radius. */ - double pix[3]; - for (int k = 0; k < 3; k++) pix[k] = pi->x[k]; + /* Get the particle position and (square of) search radius. */ + const double pix[3] = {pi->x[0], pi->x[1], pi->x[2]}; const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; - /* Is the ith particle inactive? */ - if (!part_is_starting(pi, e)) { + /* Is the ith particle active and in the range of h we care about? */ + const int update_i = part_is_starting(pi, e) && (depth_i >= min_depth) && + (depth_i <= max_depth); - /* Loop over the other particles .*/ + /* If false then it can only act as a neighbour of others */ + if (!update_i) { + + /* Loop over the particles we want to update. */ for (int pjd = firstdt; pjd < countdt; pjd++) { - /* Get a pointer to the jth particle. */ + /* Get a pointer to the jth particle. (by construction pi != pj) */ struct part *restrict pj = &parts[indt[pjd]]; + + /* This particle's (square of) search radius. */ const float hj = pj->h; + const float hjg2 = hj * hj * kernel_gamma2; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -597,50 +681,48 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { error("Particle pj not drifted to current time"); #endif - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pj->x[k] - pix[k]; - r2 += dx[k] * dx[k]; - } + /* Compute the (square of) pairwise distance. */ + const double pjx[3] = {pj->x[0], pj->x[1], pj->x[2]}; + const float dx[3] = {(float)(pjx[0] - pix[0]), (float)(pjx[1] - pix[1]), + (float)(pjx[2] - pix[2])}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Hit or miss? */ - if (r2 < hj * hj * kernel_gamma2) { + if (r2 < hjg2) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); } - } /* loop over all other particles. */ + } /* loop over all the particles we want to update. */ } /* Otherwise, interact with all candidates. */ else { - /* We caught a live one! */ + /* We caught a live one! + * Move the start of the list of active ones by one slot as it will have + * been fully processed after the following loop so no need to consider it + * in the previous loop any more. */ firstdt += 1; - /* Loop over the other particles .*/ + /* Loop over *all* the particles (i.e. the ones to update and not to + * update) but starting from where we are in the overall list. */ for (int pjd = pid + 1; pjd < count; pjd++) { - /* Get a pointer to the jth particle. */ + /* Get a pointer to the jth particle (by construction pi != pj). */ struct part *restrict pj = &parts[pjd]; + const char depth_j = pj->depth_h; /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; + /* This particle's (square of) search radius. */ const float hj = pj->h; - - /* Compute the pairwise distance. */ - float r2 = 0.0f; - float dx[3]; - for (int k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } - const int doj = - (part_is_starting(pj, e)) && (r2 < hj * hj * kernel_gamma2); - - const int doi = (r2 < hig2); + const float hjg2 = hj * hj * kernel_gamma2; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ @@ -650,26 +732,64 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { error("Particle pj not drifted to current time"); #endif + /* Compute the (square of) pairwise distance. */ + const double pjx[3] = {pj->x[0], pj->x[1], pj->x[2]}; + float dx[3] = {(float)(pix[0] - pjx[0]), (float)(pix[1] - pjx[1]), + (float)(pix[2] - pjx[2])}; + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + + /* Decide which of the two particles to update */ + + /* We know pi is active and in the right range of h + * -> Only check the distance to pj */ + const int doi = (r2 < hig2); + + /* We know nothing about pj + * -> Check whether it is active + * -> Check whether it is in the right range of h + * -> Check the distance to pi */ + const int doj = (part_is_starting(pj, e)) && (depth_j >= min_depth) && + (depth_j <= max_depth) && (r2 < hjg2); + /* Hit or miss? */ - if (doi || doj) { + if (doi && doj) { - /* Which parts need to be updated? */ - if (doi && doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + /* Update both pi and pj */ - IACT(r2, dx, hi, hj, pi, pj, a, H); - } else if (doi) { + IACT(r2, dx, hi, hj, pi, pj, a, H); + } else if (doi) { - IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); - } else if (doj) { +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; - IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); - } - } + /* Update only pi */ + + IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); + } else if (doj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + + /* Update only pj */ + + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; + IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); + + } /* Hit or miss */ } /* loop over all other particles. */ - } + } /* pi is active */ } /* loop over all particles. */ free(indt); @@ -685,7 +805,8 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { * @param c #cell c * */ -void DOSELF1_BRANCH(struct runner *r, struct cell *c) { +void DOSELF1_BRANCH(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { const struct engine *restrict e = r->e; @@ -695,17 +816,25 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) { /* Anything to do here? */ if (!cell_is_starting_hydro(c, e)) return; +#ifdef SWIFT_DEBUG_CHECKS + /* Did we mess up the recursion? */ if (c->hydro.h_max_old * kernel_gamma > c->dmin) - error("Cell smaller than smoothing length"); + if (!limit_max_h && c->hydro.h_max_active * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + /* Did we mess up the recursion? */ + if (limit_min_h && !limit_max_h) + error("Fundamental error in the recursion logic"); +#endif /* Check that cells are drifted. */ if (!cell_are_part_drifted(c, e)) error("Interacting undrifted cell."); #if defined(SWIFT_USE_NAIVE_INTERACTIONS) - DOSELF1_NAIVE(r, c); + DOSELF1_NAIVE(r, c, limit_min_h, limit_max_h); #else - DOSELF1(r, c); + DOSELF1(r, c, limit_min_h, limit_max_h); #endif } @@ -721,7 +850,7 @@ void DOSELF1_BRANCH(struct runner *r, struct cell *c) { * redundant computations to find the sid on-the-fly. */ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { + int recurse_below_h_max, const int gettimer) { struct space *s = r->e->s; const struct engine *e = r->e; @@ -733,51 +862,86 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); /* Should we even bother? */ - const int do_i = cell_get_flag(ci, cell_flag_do_hydro_limiter); - const int do_j = cell_get_flag(cj, cell_flag_do_hydro_limiter); - const int do_sub_i = cell_get_flag(ci, cell_flag_do_hydro_sub_limiter); - const int do_sub_j = cell_get_flag(cj, cell_flag_do_hydro_sub_limiter); + /* const int do_i = cell_get_flag(ci, cell_flag_do_hydro_limiter); */ + /* const int do_j = cell_get_flag(cj, cell_flag_do_hydro_limiter); */ + /* const int do_sub_i = cell_get_flag(ci, cell_flag_do_hydro_sub_limiter); */ + /* const int do_sub_j = cell_get_flag(cj, cell_flag_do_hydro_sub_limiter); */ - if (!do_i && !do_j && !do_sub_i && !do_sub_j) return; + /* if (!do_i && !do_j && !do_sub_i && !do_sub_j) return; */ if (!cell_is_starting_hydro(ci, e) && !cell_is_starting_hydro(cj, e)) return; if (ci->hydro.count == 0 || cj->hydro.count == 0) return; - /* Recurse? */ - if (cell_can_recurse_in_pair_hydro_task(ci) && - cell_can_recurse_in_pair_hydro_task(cj)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1(r, ci->progeny[pid], cj->progeny[pjd], 0); + /* We reached a leaf OR a cell small enough to be processed quickly */ + if (!ci->split || ci->hydro.count < space_recurse_size_pair_hydro || + !cj->split || cj->hydro.count < space_recurse_size_pair_hydro) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); } - } - /* Otherwise, compute the pair directly. */ - else if ((cell_is_starting_hydro(ci, e) && (do_i || do_sub_i)) || - (cell_is_starting_hydro(cj, e) && (do_j || do_sub_j))) { + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOPAIR1_BRANCH(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e) || !cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells."); + } else { - /* Do any of the cells need to be sorted first? */ - if (!(ci->hydro.sorted & (1 << sid)) || - ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) - error( - "Interacting unsorted cell. ci->hydro.dx_max_sort_old=%e ci->dmin=%e " - "ci->sorted=%d sid=%d", - ci->hydro.dx_max_sort_old, ci->dmin, ci->hydro.sorted, sid); - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error( - "Interacting unsorted cell. cj->hydro.dx_max_sort_old=%e cj->dmin=%e " - "cj->sorted=%d sid=%d", - cj->hydro.dx_max_sort_old, cj->dmin, cj->hydro.sorted, sid); - - /* Compute the interactions. */ - DOPAIR1_BRANCH(r, ci, cj); + /* Both ci and cj are split */ + + /* Should we change the recursion regime because we encountered a large + particle? */ + if (!recurse_below_h_max && (!cell_can_recurse_in_subpair_hydro_task(ci) || + !cell_can_recurse_in_subpair_hydro_task(cj))) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOPAIR1_BRANCH(r, ci, cj, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + const struct cell_split_pair *const csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) { + DOSUB_PAIR1(r, ci->progeny[pid], cj->progeny[pjd], recurse_below_h_max, + /*gettimer=*/0); + } + } } if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); @@ -790,38 +954,58 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, * @param ci The first #cell. * @param gettimer Do we have a timer ? */ -void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { +void DOSUB_SELF1(struct runner *r, struct cell *c, int recurse_below_h_max, + const int gettimer) { TIMER_TIC; /* Should we even bother? */ - const int do_i = cell_get_flag(ci, cell_flag_do_hydro_limiter); - const int do_sub_i = cell_get_flag(ci, cell_flag_do_hydro_sub_limiter); - - if (!do_i && !do_sub_i) return; - if (!cell_is_starting_hydro(ci, r->e)) return; - if (ci->hydro.count == 0) return; - - /* Recurse? */ - if (cell_can_recurse_in_self_hydro_task(ci)) { - - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1(r, ci->progeny[k], ci->progeny[j], 0); - } - } + /* const int do_i = cell_get_flag(c, cell_flag_do_hydro_limiter); */ + /* const int do_sub_i = cell_get_flag(c, cell_flag_do_hydro_sub_limiter); */ + + /* if (!do_i && !do_sub_i) return; */ + if (!cell_is_starting_hydro(c, r->e)) return; + if (c->hydro.count == 0) return; + + /* We reached a leaf OR a cell small enough to process quickly */ + if (!c->split || c->hydro.count < space_recurse_size_self_hydro) { - /* Otherwise, compute self-interaction. */ - else { + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOSELF1_BRANCH(r, c, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); - /* Drift the cell to the current timestep if needed. */ - if (!cell_are_part_drifted(ci, r->e)) error("Interacting undrifted cell."); + } else { - DOSELF1_BRANCH(r, ci); + /* Should we change the recursion regime because we encountered a large + particle at this level? */ + if (!recurse_below_h_max && !cell_can_recurse_in_subself_hydro_task(c)) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOSELF1_BRANCH(r, c, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + DOSUB_SELF1(r, c->progeny[k], recurse_below_h_max, /*gettimer=*/0); + for (int j = k + 1; j < 8; j++) { + if (c->progeny[j] != NULL) { + DOSUB_PAIR1(r, c->progeny[k], c->progeny[j], recurse_below_h_max, + /*gettimer=*/0); + } + } + } + } } if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF); diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h index b91066f509b9364288ef6ff70e58eebab6150072..abdb0bfdca6b2cbd343189707080f7978d6c3453 100644 --- a/src/runner_doiact_functions_stars.h +++ b/src/runner_doiact_functions_stars.h @@ -24,6 +24,7 @@ and runner_dosub_FUNCTION calling the pairwise interaction function runner_iact_FUNCTION. */ +#include "feedback.h" #include "runner_doiact_stars.h" #ifdef RT_NONE @@ -42,9 +43,13 @@ * * @param r runner task * @param c cell - * @param timer 1 if the time is to be recorded. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. */ -void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { +void DOSELF1_STARS(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { #ifdef SWIFT_DEBUG_CHECKS if (c->nodeID != engine_rank) error("Should be run on a different node"); @@ -74,11 +79,24 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { const int with_rt = WITH_RT; + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? c->depth : 0; + const char max_depth = limit_min_h ? c->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? c->h_min_allowed : 0.; + const float h_max = limit_max_h ? c->h_max_allowed : FLT_MAX; +#endif + /* Loop over the sparts in ci. */ for (int sid = 0; sid < scount; sid++) { /* Get a hold of the ith spart in ci. */ - struct spart *restrict si = &sparts[sid]; + struct spart *si = &sparts[sid]; + const char depth_i = si->depth_h; + const float hi = si->h; + const float hig2 = hi * hi * kernel_gamma2; /* Skip inhibited particles */ if (spart_is_inhibited(si, e)) continue; @@ -87,11 +105,13 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { if (!spart_is_active(si, e)) continue; /* Skip inactive particles */ - int si_active_feedback = feedback_is_active(si, e); + const int si_active_feedback = feedback_is_active(si, e); if (!si_active_feedback && !with_rt) continue; - const float hi = si->h; - const float hig2 = hi * hi * kernel_gamma2; + /* Skip particles not in the range of h we care about */ + if (depth_i > max_depth) continue; + if (depth_i < min_depth) continue; + const float six[3] = {(float)(si->x[0] - c->loc[0]), (float)(si->x[1] - c->loc[1]), (float)(si->x[2] - c->loc[2])}; @@ -113,7 +133,7 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { const float pjx[3] = {(float)(pj->x[0] - c->loc[0]), (float)(pj->x[1] - c->loc[1]), (float)(pj->x[2] - c->loc[2])}; - float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -123,6 +143,11 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { #endif if (r2 < hig2 && si_active_feedback) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, NULL, cosmo, @@ -161,9 +186,15 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { * @param r runner task * @param ci The first #cell * @param cj The second #cell + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. */ -void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { +void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, + const struct cell *restrict ci, + const struct cell *restrict cj, + const int limit_min_h, const int limit_max_h) { #ifdef SWIFT_DEBUG_CHECKS #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -194,6 +225,20 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, #endif const int with_rt = WITH_RT; +#ifdef SWIFT_DEBUG_CHECKS + if (ci->dmin != cj->dmin) error("Cells of different size!"); +#endif + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; +#endif + /* Get the relative distance between the pairs, wrapping. */ double shift[3] = {0.0, 0.0, 0.0}; for (int k = 0; k < 3; k++) { @@ -207,7 +252,8 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, for (int sid = 0; sid < scount_i; sid++) { /* Get a hold of the ith spart in ci. */ - struct spart *restrict si = &sparts_i[sid]; + struct spart *si = &sparts_i[sid]; + const char depth_i = si->depth_h; /* Skip inhibited particles */ if (spart_is_inhibited(si, e)) continue; @@ -216,7 +262,7 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, if (!spart_is_active(si, e)) continue; /* Skip inactive particles */ - int si_active_feedback = feedback_is_active(si, e); + const int si_active_feedback = feedback_is_active(si, e); if (!si_active_feedback && !with_rt) continue; const float hi = si->h; @@ -225,6 +271,15 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, (float)(si->x[1] - (cj->loc[1] + shift[1])), (float)(si->x[2] - (cj->loc[2] + shift[2]))}; +#ifdef SWIFT_DEBUG_CHECKS + if (hi > ci->stars.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_i > max_depth) continue; + if (depth_i < min_depth) continue; + /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { @@ -242,7 +297,7 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, const float pjx[3] = {(float)(pj->x[0] - cj->loc[0]), (float)(pj->x[1] - cj->loc[1]), (float)(pj->x[2] - cj->loc[2])}; - float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; + const float dx[3] = {six[0] - pjx[0], six[1] - pjx[1], six[2] - pjx[2]}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -252,6 +307,11 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, #endif if (r2 < hig2 && si_active_feedback) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) error("Inappropriate h for this level!"); +#endif + IACT_STARS(r2, dx, hi, hj, si, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -289,11 +349,17 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. + * @param limit_min_h Only consider particles with h >= c->dmin/2. + * @param limit_max_h Only consider particles with h < c->dmin. * @param sid The direction of the pair. * @param shift The shift vector to apply to the particles in ci. + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. */ -void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - const int sid, const double *shift) { +void DO_SYM_PAIR1_STARS(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h, const int sid, + const double shift[3]) { TIMER_TIC; @@ -324,6 +390,20 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, #endif const int with_rt = WITH_RT; +#ifdef SWIFT_DEBUG_CHECKS + if (ci->dmin != cj->dmin) error("Cells of different size!"); +#endif + + /* Get the depth limits (if any) */ + const char min_depth = limit_max_h ? ci->depth : 0; + const char max_depth = limit_min_h ? ci->depth : CHAR_MAX; + +#ifdef SWIFT_DEBUG_CHECKS + /* Get the limits in h (if any) */ + const float h_min = limit_min_h ? ci->h_min_allowed : 0.; +#endif + const float h_max = limit_max_h ? ci->h_max_allowed : FLT_MAX; + if (do_ci_stars) { /* Pick-out the sorted lists. */ @@ -344,24 +424,26 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ - const double hi_max = ci->stars.h_max * kernel_gamma - rshift; + const double hi_max = + min(h_max, ci->stars.h_max_active) * kernel_gamma - rshift; const int count_i = ci->stars.count; const int count_j = cj->hydro.count; - struct spart *restrict sparts_i = ci->stars.parts; - struct part *restrict parts_j = cj->hydro.parts; + struct spart *sparts_i = ci->stars.parts; + struct part *parts_j = cj->hydro.parts; #if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) - struct xpart *restrict xparts_j = cj->hydro.xparts; + struct xpart *xparts_j = cj->hydro.xparts; #endif const double dj_min = sort_j[0].d; const float dx_max = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort); - const float hydro_dx_max_rshift = cj->hydro.dx_max_sort - rshift; - /* Loop over the sparts in ci. */ + /* Loop over the *active* sparts in ci that are within range (on the axis) + of any particle in cj. */ for (int pid = count_i - 1; pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { /* Get a hold of the ith part in ci. */ - struct spart *restrict spi = &sparts_i[sort_i[pid].i]; + struct spart *spi = &sparts_i[sort_i[pid].i]; + const char depth_i = spi->depth_h; const float hi = spi->h; /* Skip inhibited particles */ @@ -374,13 +456,17 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, const int spi_active_feedback = feedback_is_active(spi, e); if (!spi_active_feedback && !with_rt) continue; - /* Compute distance from the other cell. */ - const double px[3] = {spi->x[0], spi->x[1], spi->x[2]}; - float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + - px[2] * runner_shift[sid][2]; +#ifdef SWIFT_DEBUG_CHECKS + if (hi > ci->stars.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_i > max_depth) continue; + if (depth_i < min_depth) continue; /* Is there anything we need to interact with ? */ - const double di = dist + hi * kernel_gamma + hydro_dx_max_rshift; + const double di = sort_i[pid].d + hi * kernel_gamma + dx_max - rshift; if (di < dj_min) continue; /* Get some additional information about pi */ @@ -407,7 +493,7 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, const float pjz = pj->x[2] - cj->loc[2]; /* Compute the pairwise distance. */ - float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; + const float dx[3] = {pix - pjx, piy - pjy, piz - pjz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -446,6 +532,12 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, /* Hit or miss? */ if (r2 < hig2 && spi_active_feedback) { + +#ifdef SWIFT_DEBUG_CHECKS + if (hi < h_min || hi >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -497,7 +589,7 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, #endif /* SWIFT_DEBUG_CHECKS */ /* Get some other useful values. */ - const double hj_max = cj->stars.h_max * kernel_gamma; + const double hj_max = min(h_max, cj->stars.h_max_active) * kernel_gamma; const int count_i = ci->hydro.count; const int count_j = cj->stars.count; struct spart *restrict sparts_j = cj->stars.parts; @@ -507,14 +599,15 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, #endif const double di_max = sort_i[count_i - 1].d - rshift; const float dx_max = (ci->hydro.dx_max_sort + cj->stars.dx_max_sort); - const float hydro_dx_max_rshift = ci->hydro.dx_max_sort - rshift; - /* Loop over the parts in cj. */ + /* Loop over the *active* sparts in cj that are within range (on the axis) + of any particle in ci. */ for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; pjd++) { /* Get a hold of the jth part in cj. */ struct spart *spj = &sparts_j[sort_j[pjd].i]; + const char depth_j = spj->depth_h; const float hj = spj->h; /* Skip inhibited particles */ @@ -524,16 +617,20 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, if (!spart_is_active(spj, e)) continue; /* Skip inactive particles */ - int spj_active_feedback = feedback_is_active(spj, e); + const int spj_active_feedback = feedback_is_active(spj, e); if (!spj_active_feedback && !with_rt) continue; - /* Compute distance from the other cell. */ - const double px[3] = {spj->x[0], spj->x[1], spj->x[2]}; - float dist = px[0] * runner_shift[sid][0] + px[1] * runner_shift[sid][1] + - px[2] * runner_shift[sid][2]; +#ifdef SWIFT_DEBUG_CHECKS + if (hj > cj->stars.h_max_active) + error("Particle has h larger than h_max_active"); +#endif + + /* Skip particles not in the range of h we care about */ + if (depth_j > max_depth) continue; + if (depth_j < min_depth) continue; /* Is there anything we need to interact with ? */ - const double dj = dist - hj * kernel_gamma - hydro_dx_max_rshift; + const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; /* Get some additional information about pj */ @@ -560,7 +657,7 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, const float piz = pi->x[2] - (cj->loc[2] + shift[2]); /* Compute the pairwise distance. */ - float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; + const float dx[3] = {pjx - pix, pjy - piy, pjz - piz}; const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; #ifdef SWIFT_DEBUG_CHECKS @@ -600,6 +697,11 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, /* Hit or miss? */ if (r2 < hjg2 && spj_active_feedback) { +#ifdef SWIFT_DEBUG_CHECKS + if (hj < h_min || hj >= h_max) + error("Inappropriate h for this level!"); +#endif + IACT_STARS(r2, dx, hj, hi, spj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) @@ -635,8 +737,9 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, TIMER_TOC(TIMER_DOPAIR_STARS); } -void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj, int timer) { +void DOPAIR1_STARS_NAIVE(struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj, const int limit_min_h, + const int limit_max_h) { TIMER_TIC; @@ -650,9 +753,9 @@ void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, const int do_cj_stars = ci->nodeID == r->e->nodeID; #endif if (do_ci_stars && ci->stars.count != 0 && cj->hydro.count != 0) - DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj); + DO_NONSYM_PAIR1_STARS_NAIVE(r, ci, cj, limit_min_h, limit_max_h); if (do_cj_stars && cj->stars.count != 0 && ci->hydro.count != 0) - DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci); + DO_NONSYM_PAIR1_STARS_NAIVE(r, cj, ci, limit_min_h, limit_max_h); TIMER_TOC(TIMER_DOPAIR_STARS); } @@ -673,10 +776,11 @@ void DOPAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, * @param flipped Flag to check whether the cells have been flipped or not. * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts_i, int *restrict ind, - int scount, struct cell *restrict cj, const int sid, - const int flipped, const double *shift) { +void DOPAIR1_SUBSET_STARS(struct runner *r, const struct cell *restrict ci, + struct spart *restrict sparts_i, const int *ind, + const int scount, const struct cell *restrict cj, + const int sid, const int flipped, + const double shift[3]) { const struct engine *e = r->e; const struct cosmology *cosmo = e->cosmology; @@ -836,10 +940,11 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, * @param cj The second #cell. * @param shift The shift vector to apply to the particles in ci. */ -void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts_i, - int *restrict ind, int scount, - struct cell *restrict cj, const double *shift) { +void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, + const struct cell *restrict ci, + struct spart *restrict sparts_i, const int *ind, + const int scount, struct cell *restrict cj, + const double shift[3]) { #ifdef SWIFT_DEBUG_CHECKS if (ci->nodeID != engine_rank) error("Should be run on a different node"); @@ -929,9 +1034,9 @@ void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, * @param ind The list of indices of particles in @c ci to interact with. * @param scount The number of particles in @c ind. */ -void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts, int *restrict ind, - int scount) { +void DOSELF1_SUBSET_STARS(struct runner *r, const struct cell *ci, + struct spart *restrict sparts, const int *const ind, + const int scount) { #ifdef SWIFT_DEBUG_CHECKS if (ci->nodeID != engine_rank) error("Should be run on a different node"); #endif @@ -1016,9 +1121,9 @@ void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, * @param ind The list of indices of particles in @c ci to interact with. * @param scount The number of particles in @c ind. */ -void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, +void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, const struct cell *ci, struct spart *restrict sparts, - int *restrict ind, int scount) { + const int *const ind, const int scount) { DOSELF1_SUBSET_STARS(r, ci, sparts, ind, scount); } @@ -1035,9 +1140,10 @@ void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, * @param scount The number of particles in @c ind. * @param cj The second #cell. */ -void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, +void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, + const struct cell *restrict ci, struct spart *restrict sparts_i, - int *restrict ind, int scount, + const int *ind, const int scount, struct cell *restrict cj) { const struct engine *e = r->e; @@ -1054,9 +1160,6 @@ void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, shift[k] = -e->s->dim[k]; } -#ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS - DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift); -#else /* Get the sorting index. */ int sid = 0; for (int k = 0; k < 3; k++) @@ -1068,97 +1171,29 @@ void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, const int flipped = runner_flip[sid]; sid = sortlistID[sid]; - /* Has the cell cj been sorted? */ - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin) - error("Interacting unsorted cells."); + /* Let's first lock the cell */ + lock_lock(&cj->hydro.extra_sort_lock); - DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift); -#endif -} - -void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, - int *ind, int scount, struct cell *cj, int gettimer) { + const int is_sorted = + (cj->hydro.sorted & (1 << sid)) && + (cj->hydro.dx_max_sort_old <= space_maxreldx * cj->dmin); - const struct engine *e = r->e; - struct space *s = e->s; - - /* Should we even bother? */ - if (!cell_is_active_stars(ci, e) && - (cj == NULL || !cell_is_active_stars(cj, e))) - return; +#if defined(SWIFT_USE_NAIVE_INTERACTIONS) + const int force_naive = 1; +#else + const int force_naive = 0; +#endif - /* Find out in which sub-cell of ci the parts are. */ - struct cell *sub = NULL; - if (ci->split) { - for (int k = 0; k < 8; k++) { - if (ci->progeny[k] != NULL) { - if (&sparts[ind[0]] >= &ci->progeny[k]->stars.parts[0] && - &sparts[ind[0]] < - &ci->progeny[k]->stars.parts[ci->progeny[k]->stars.count]) { - sub = ci->progeny[k]; - break; - } - } - } + /* Can we use the sorted interactions or do we default to naive? */ + if (force_naive || !is_sorted) { + DOPAIR1_SUBSET_STARS_NAIVE(r, ci, sparts_i, ind, scount, cj, shift); + } else { + DOPAIR1_SUBSET_STARS(r, ci, sparts_i, ind, scount, cj, sid, flipped, shift); } - /* Is this a single cell? */ - if (cj == NULL) { - - /* Recurse? */ - if (cell_can_recurse_in_self_stars_task(ci)) { - - /* Loop over all progeny. */ - DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, NULL, 0); - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != sub && ci->progeny[j] != NULL) - DOSUB_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], 0); - - } - - /* Otherwise, compute self-interaction. */ - else - DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); - } /* self-interaction. */ - - /* Otherwise, it's a pair interaction. */ - else { - - /* Recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { - - /* Get the type of pair and flip ci/cj if needed. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); - - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) - DOSUB_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount, - cj->progeny[pjd], 0); - if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) - DOSUB_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount, - ci->progeny[pid], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else if (cell_is_active_stars(ci, e) && cj->hydro.count > 0) { - - /* Do any of the cells need to be drifted first? */ - if (cell_is_active_stars(ci, e)) { - if (!cell_are_spart_drifted(ci, e)) error("Cell should be drifted!"); - if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); - } - - DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); - } - - } /* otherwise, pair interaction. */ + /* Now we can unlock */ + if (lock_unlock(&cj->hydro.extra_sort_lock) != 0) + error("Impossible to unlock cell!"); } /** @@ -1167,54 +1202,45 @@ void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, * * @param r #runner * @param c #cell c - * + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. */ -void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { +void DOSELF1_BRANCH_STARS(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h) { const struct engine *restrict e = r->e; /* Anything to do here? */ if (c->stars.count == 0) return; + /* Anything to do here? */ + if (c->hydro.count == 0) return; + /* Anything to do here? */ if (!cell_is_active_stars(c, e)) return; +#ifdef SWIFT_DEBUG_CHECKS + /* Did we mess up the recursion? */ if (c->stars.h_max_old * kernel_gamma > c->dmin) error("Cell smaller than smoothing length"); - DOSELF1_STARS(r, c, 1); -} + if (!limit_max_h && c->stars.h_max_active * kernel_gamma > c->dmin) + error("Cell smaller than smoothing length"); + + /* Did we mess up the recursion? */ + if (limit_min_h && !limit_max_h) + error("Fundamental error in the recursion logic"); +#endif + + /* Check that cells are drifted. */ + if (!cell_are_part_drifted(c, e)) + error("Interacting undrifted cell (hydro)."); + if (!cell_are_spart_drifted(c, e)) + error("Interacting undrifted cell (stars)."); -#define RUNNER_CHECK_SORT(TYPE, PART, cj, ci, sid) \ - ({ \ - const struct sort_entry *restrict sort_j = \ - cell_get_##TYPE##_sorts(cj, sid); \ - \ - for (int pjd = 0; pjd < cj->TYPE.count; pjd++) { \ - const struct PART *p = &cj->TYPE.parts[sort_j[pjd].i]; \ - if (PART##_is_inhibited(p, e)) continue; \ - \ - const float d = p->x[0] * runner_shift[sid][0] + \ - p->x[1] * runner_shift[sid][1] + \ - p->x[2] * runner_shift[sid][2]; \ - if ((fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ - 1.0e-4 * max(fabsf(d), cj->TYPE.dx_max_sort_old) && \ - (fabsf(d - sort_j[pjd].d) - cj->TYPE.dx_max_sort) > \ - cj->width[0] * 1.0e-10) \ - error( \ - "particle shift diff exceeds dx_max_sort in cell cj. " \ - "cj->nodeID=%d " \ - "ci->nodeID=%d d=%e sort_j[pjd].d=%e cj->" #TYPE \ - ".dx_max_sort=%e " \ - "cj->" #TYPE \ - ".dx_max_sort_old=%e, cellID=%lld super->cellID=%lld" \ - "cj->depth=%d cj->maxdepth=%d", \ - cj->nodeID, ci->nodeID, d, sort_j[pjd].d, cj->TYPE.dx_max_sort, \ - cj->TYPE.dx_max_sort_old, cj->cellID, cj->hydro.super->cellID, \ - cj->depth, cj->maxdepth); \ - } \ - }) + DOSELF1_STARS(r, c, limit_min_h, limit_max_h); +} /** * @brief Determine which version of DOPAIR1_STARS needs to be called depending @@ -1224,9 +1250,11 @@ void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c) { * @param r #runner * @param ci #cell ci * @param cj #cell cj - * + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. */ -void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h) { const struct engine *restrict e = r->e; @@ -1234,21 +1262,24 @@ void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid_and_swap_cells(e->s, &ci, &cj, shift); - const int ci_active = cell_is_active_stars(ci, e); - const int cj_active = cell_is_active_stars(cj, e); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY || \ FUNCTION_TASK_LOOP == TASK_LOOP_STARS_PREP2) - const int do_ci_stars = ci->nodeID == e->nodeID; - const int do_cj_stars = cj->nodeID == e->nodeID; + /* Here we update the stars --> the star cell must be local */ + const int ci_local = (ci->nodeID == e->nodeID); + const int cj_local = (cj->nodeID == e->nodeID); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK || \ + FUNCTION_TASK_LOOP == TASK_LOOP_STARS_PREP1) + /* Here we update the gas --> the gas cell must be local */ + const int ci_local = (cj->nodeID == e->nodeID); + const int cj_local = (ci->nodeID == e->nodeID); #else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == e->nodeID; - const int do_cj_stars = ci->nodeID == e->nodeID; + error("Invalid loop type!"); #endif - const int do_ci = (ci->stars.count != 0 && cj->hydro.count != 0 && - ci_active && do_ci_stars); - const int do_cj = (cj->stars.count != 0 && ci->hydro.count != 0 && - cj_active && do_cj_stars); + + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e) && ci_local; + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e) && cj_local; /* Anything to do here? */ if (!do_ci && !do_cj) return; @@ -1258,46 +1289,32 @@ void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { (!cell_are_spart_drifted(ci, e) || !cell_are_part_drifted(cj, e))) error("Interacting undrifted cells."); + if (do_cj && + (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) + error("Interacting undrifted cells."); + /* Have the cells been sorted? */ if (do_ci && (!(ci->stars.sorted & (1 << sid)) || ci->stars.dx_max_sort_old > space_maxreldx * ci->dmin)) - error("Interacting unsorted cells."); + error("Interacting unsorted cells (ci stars)."); if (do_ci && (!(cj->hydro.sorted & (1 << sid)) || cj->hydro.dx_max_sort_old > space_maxreldx * cj->dmin)) - error("Interacting unsorted cells."); - - if (do_cj && - (!cell_are_part_drifted(ci, e) || !cell_are_spart_drifted(cj, e))) - error("Interacting undrifted cells."); + error("Interacting unsorted cells (cj hydro)."); /* Have the cells been sorted? */ if (do_cj && (!(ci->hydro.sorted & (1 << sid)) || ci->hydro.dx_max_sort_old > space_maxreldx * ci->dmin)) - error("Interacting unsorted cells."); + error("Interacting unsorted cells. (ci hydro)"); if (do_cj && (!(cj->stars.sorted & (1 << sid)) || cj->stars.dx_max_sort_old > space_maxreldx * cj->dmin)) - error("Interacting unsorted cells."); - -#ifdef SWIFT_DEBUG_CHECKS - if (do_ci) { - // MATTHIEU: This test is faulty. To be fixed... - // RUNNER_CHECK_SORT(hydro, part, cj, ci, sid); - RUNNER_CHECK_SORT(stars, spart, ci, cj, sid); - } - - if (do_cj) { - // MATTHIEU: This test is faulty. To be fixed... - // RUNNER_CHECK_SORT(hydro, part, ci, cj, sid); - RUNNER_CHECK_SORT(stars, spart, cj, ci, sid); - } -#endif /* SWIFT_DEBUG_CHECKS */ + error("Interacting unsorted cells. (cj stars)"); #ifdef SWIFT_USE_NAIVE_INTERACTIONS_STARS - DOPAIR1_STARS_NAIVE(r, ci, cj, 1); + DOPAIR1_STARS_NAIVE(r, ci, cj, limit_min_h, limit_max_h); #else - DO_SYM_PAIR1_STARS(r, ci, cj, sid, shift); + DO_SYM_PAIR1_STARS(r, ci, cj, limit_min_h, limit_max_h, sid, shift); #endif } @@ -1308,100 +1325,146 @@ void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj) { * @param ci The first #cell. * @param cj The second #cell. * @param gettimer Do we have a timer ? + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. * * @todo Hard-code the sid on the recursive calls to avoid the * redundant computations to find the sid on-the-fly. */ void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { - + int recurse_below_h_max, const int gettimer) { TIMER_TIC; struct space *s = r->e->s; const struct engine *e = r->e; - /* Should we even bother? */ - const int should_do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e); - const int should_do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e); - if (!should_do_ci && !should_do_cj) return; - /* Get the type of pair and flip ci/cj if needed. */ double shift[3]; const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); - /* Recurse? */ - if (cell_can_recurse_in_pair_stars_task(ci, cj) && - cell_can_recurse_in_pair_stars_task(cj, ci)) { - struct cell_split_pair *csp = &cell_split_pairs[sid]; - for (int k = 0; k < csp->count; k++) { - const int pid = csp->pairs[k].pid; - const int pjd = csp->pairs[k].pjd; - if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) - DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], 0); - } - } - - /* Otherwise, compute the pair directly. */ - else { - #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY || \ FUNCTION_TASK_LOOP == TASK_LOOP_STARS_PREP2) - const int do_ci_stars = ci->nodeID == e->nodeID; - const int do_cj_stars = cj->nodeID == e->nodeID; + /* Here we update the stars --> the star cell must be local */ + const int ci_local = (ci->nodeID == e->nodeID); + const int cj_local = (cj->nodeID == e->nodeID); +#elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK || \ + FUNCTION_TASK_LOOP == TASK_LOOP_STARS_PREP1) + /* Here we update the gas --> the gas cell must be local */ + const int ci_local = (cj->nodeID == e->nodeID); + const int cj_local = (ci->nodeID == e->nodeID); #else - /* here we are updating the hydro -> switch ci, cj */ - const int do_ci_stars = cj->nodeID == e->nodeID; - const int do_cj_stars = ci->nodeID == e->nodeID; + error("Invalid loop type!"); #endif - const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && - cell_is_active_stars(ci, e) && do_ci_stars; - const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && - cell_is_active_stars(cj, e) && do_cj_stars; - if (do_ci) { + /* What kind of pair are we doing here? */ + const int do_ci = ci->stars.count != 0 && cj->hydro.count != 0 && + cell_is_active_stars(ci, e) && ci_local; + const int do_cj = cj->stars.count != 0 && ci->hydro.count != 0 && + cell_is_active_stars(cj, e) && cj_local; - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_spart_drifted(ci, e)) - error("Interacting undrifted cells (sparts)."); + /* Should we even bother? */ + if (!do_ci && !do_cj) return; - if (!cell_are_part_drifted(cj, e)) - error("Interacting undrifted cells (parts)."); + /* We reached a leaf OR a cell small enough to be processed quickly */ + if (!ci->split || ci->stars.count < space_recurse_size_pair_stars || + !cj->split || cj->stars.count < space_recurse_size_pair_stars) { - /* Do any of the cells need to be sorted first? */ + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (do_ci) { if (!(ci->stars.sorted & (1 << sid)) || ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) { - error("Interacting unsorted cell (sparts)."); + runner_do_stars_sort(r, ci, (1 << sid), 0, 0); } - if (!(cj->hydro.sorted & (1 << sid)) || - cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) - error("Interacting unsorted cell (parts). %i", cj->nodeID); + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); + } } - if (do_cj) { - - /* Make sure both cells are drifted to the current timestep. */ - if (!cell_are_part_drifted(ci, e)) - error("Interacting undrifted cells (parts)."); - - if (!cell_are_spart_drifted(cj, e)) - error("Interacting undrifted cells (sparts)."); - - /* Do any of the cells need to be sorted first? */ if (!(ci->hydro.sorted & (1 << sid)) || ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { - error("Interacting unsorted cell (parts)."); + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, /*clock=*/0); } - if (!(cj->stars.sorted & (1 << sid)) || cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) { - error("Interacting unsorted cell (sparts)."); + runner_do_stars_sort(r, cj, (1 << sid), 0, 0); + } + } + + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOPAIR1_BRANCH_STARS(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); + + } else { + + /* Both ci and cj are split */ + + /* Should we change the recursion regime because we encountered a large + particle? */ + if (!recurse_below_h_max && (!cell_can_recurse_in_subpair_stars_task(ci) || + !cell_can_recurse_in_subpair_stars_task(cj))) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* Do any of the cells need to be sorted first? + * Since h_max might have changed, we may not have sorted at this level */ + if (do_ci) { + if (!(ci->stars.sorted & (1 << sid)) || + ci->stars.dx_max_sort_old > ci->dmin * space_maxreldx) { + runner_do_stars_sort(r, ci, (1 << sid), 0, 0); + } + if (!(cj->hydro.sorted & (1 << sid)) || + cj->hydro.dx_max_sort_old > cj->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, cj, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, + /*clock=*/0); + } } + if (do_cj) { + if (!(ci->hydro.sorted & (1 << sid)) || + ci->hydro.dx_max_sort_old > ci->dmin * space_maxreldx) { + /* Bert: RT probably broken here! */ + runner_do_hydro_sort(r, ci, (1 << sid), /*cleanup=*/0, /*lock=*/1, + /*rt_request=*/0, + /*clock=*/0); + } + if (!(cj->stars.sorted & (1 << sid)) || + cj->stars.dx_max_sort_old > cj->dmin * space_maxreldx) { + runner_do_stars_sort(r, cj, (1 << sid), 0, 0); + } + } + + /* message("Multi-level PAIR! ci->count=%d cj->count=%d", + * ci->hydro.count, cj->hydro.count); */ + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOPAIR1_BRANCH_STARS(r, ci, cj, /*limit_h_min=*/1, /*limit_h_max=*/1); } - if (do_ci || do_cj) DOPAIR1_BRANCH_STARS(r, ci, cj); + /* Recurse to the lower levels. */ + const struct cell_split_pair *const csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] != NULL && cj->progeny[pjd] != NULL) { + DOSUB_PAIR1_STARS(r, ci->progeny[pid], cj->progeny[pjd], + recurse_below_h_max, /*gettimer=*/0); + } + } } TIMER_TOC(TIMER_DOSUB_PAIR_STARS); @@ -1413,44 +1476,182 @@ void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, * @param r The #runner. * @param ci The first #cell. * @param gettimer Do we have a timer ? + * @param offset First particle in the cell to treat (for split tasks). + * @param increment Interval between successive particles that are treated. */ -void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer) { +void DOSUB_SELF1_STARS(struct runner *r, struct cell *c, + int recurse_below_h_max, const int gettimer) { TIMER_TIC; #ifdef SWIFT_DEBUG_CHECKS - if (ci->nodeID != engine_rank) + if (c->nodeID != engine_rank) error("This function should not be called on foreign cells"); #endif /* Should we even bother? */ - if (ci->hydro.count == 0 || ci->stars.count == 0 || - !cell_is_active_stars(ci, r->e)) + if (c->hydro.count == 0 || c->stars.count == 0 || + !cell_is_active_stars(c, r->e)) return; - /* Recurse? */ - if (cell_can_recurse_in_self_stars_task(ci)) { + /* We reached a leaf OR a cell small enough to process quickly */ + if (!c->split || c->stars.count < space_recurse_size_self_stars) { - /* Loop over all progeny. */ - for (int k = 0; k < 8; k++) - if (ci->progeny[k] != NULL) { - DOSUB_SELF1_STARS(r, ci->progeny[k], 0); - for (int j = k + 1; j < 8; j++) - if (ci->progeny[j] != NULL) - DOSUB_PAIR1_STARS(r, ci->progeny[k], ci->progeny[j], 0); + /* We interact all particles in that cell: + - No limit on the smallest h + - Apply the max h limit if we are recursing below the level + where h is smaller than the cell size */ + DOSELF1_BRANCH_STARS(r, c, /*limit_h_min=*/0, + /*limit_h_max=*/recurse_below_h_max); + + } else { + + /* Should we change the recursion regime because we encountered a large + particle at this level? */ + if (!recurse_below_h_max && !cell_can_recurse_in_subself_stars_task(c)) { + recurse_below_h_max = 1; + } + + /* If some particles are larger than the daughter cells, we must + process them at this level before going deeper */ + if (recurse_below_h_max) { + + /* message("Multi-level SELF! c->count=%d", c->hydro.count); */ + + /* Interact all *active* particles with h in the range [dmin/2, dmin) + with all their neighbours */ + DOSELF1_BRANCH_STARS(r, c, /*limit_h_min=*/1, /*limit_h_max=*/1); + } + + /* Recurse to the lower levels. */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + DOSUB_SELF1_STARS(r, c->progeny[k], recurse_below_h_max, + /*gettimer=*/0); + for (int j = k + 1; j < 8; j++) { + if (c->progeny[j] != NULL) { + DOSUB_PAIR1_STARS(r, c->progeny[k], c->progeny[j], + recurse_below_h_max, + /*gettimer=*/0); + } + } } + } } - /* Otherwise, compute self-interaction. */ - else { + if (gettimer) TIMER_TOC(TIMER_DOSUB_SELF_STARS); +} - /* Drift the cell to the current timestep if needed. */ - if (!cell_are_spart_drifted(ci, r->e)) error("Interacting undrifted cell."); +/** + * @brief Find which sub-cell of a cell contain the subset of particles given + * by the list of indices. + * + * Will throw an error if the sub-cell can't be found. + * + * @param c The #cell + * @param sparts An array of #spart. + * @param ind Index of the #spart's in the particle array to find in the subs. + */ +struct cell *FIND_SUB_STARS(const struct cell *const c, + const struct spart *const sparts, + const int *const ind) { + +#ifdef SWIFT_DEBUG_CHECKS + if (!c->split) error("Can't search for subs in a non-split cell"); +#endif - DOSELF1_BRANCH_STARS(r, ci); + /* Find out in which sub-cell of ci the parts are. + * + * Note: We only need to check the first particle in the list */ + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + if (&sparts[ind[0]] >= &c->progeny[k]->stars.parts[0] && + &sparts[ind[0]] < + &c->progeny[k]->stars.parts[c->progeny[k]->stars.count]) { + return c->progeny[k]; + break; + } + } } + error("Invalid sub!"); + return NULL; +} + +void DOSUB_PAIR_SUBSET_STARS(struct runner *r, struct cell *ci, + struct spart *sparts, const int *ind, + const int scount, struct cell *cj, + const int gettimer) { + + const struct engine *e = r->e; + struct space *s = e->s; + + /* Should we even bother? */ + if (cj->hydro.count == 0) return; + if (ci->stars.count == 0) return; + if (!cell_is_active_stars(ci, e)) return; + + /* Recurse? */ + if (cell_can_recurse_in_pair_stars_task(ci) && + cell_can_recurse_in_pair_stars_task(cj)) { + + /* Find in which sub-cell of ci the particles are */ + struct cell *const sub = FIND_SUB_STARS(ci, sparts, ind); - TIMER_TOC(TIMER_DOSUB_SELF_STARS); + /* Get the type of pair and flip ci/cj if needed. */ + double shift[3]; + const int sid = space_getsid_and_swap_cells(s, &ci, &cj, shift); + + struct cell_split_pair *csp = &cell_split_pairs[sid]; + for (int k = 0; k < csp->count; k++) { + const int pid = csp->pairs[k].pid; + const int pjd = csp->pairs[k].pjd; + if (ci->progeny[pid] == sub && cj->progeny[pjd] != NULL) + DOSUB_PAIR_SUBSET_STARS(r, ci->progeny[pid], sparts, ind, scount, + cj->progeny[pjd], /*gettimer=*/0); + if (ci->progeny[pid] != NULL && cj->progeny[pjd] == sub) + DOSUB_PAIR_SUBSET_STARS(r, cj->progeny[pjd], sparts, ind, scount, + ci->progeny[pid], /*gettimer=*/0); + } + + } + /* Otherwise, compute the pair directly. */ + else if (cell_is_active_stars(ci, e)) { + + /* Do any of the cells need to be drifted first? */ + if (!cell_are_part_drifted(cj, e)) error("Cell should be drifted!"); + + DOPAIR1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount, cj); + } +} + +void DOSUB_SELF_SUBSET_STARS(struct runner *r, struct cell *ci, + struct spart *sparts, const int *ind, + const int scount, const int gettimer) { + + const struct engine *e = r->e; + + /* Should we even bother? */ + if (ci->hydro.count == 0) return; + if (ci->stars.count == 0) return; + if (!cell_is_active_stars(ci, e)) return; + + /* Recurse? */ + if (ci->split && cell_can_recurse_in_self_stars_task(ci)) { + + /* Find in which sub-cell of ci the particles are */ + struct cell *const sub = FIND_SUB_STARS(ci, sparts, ind); + + /* Loop over all progeny. */ + DOSUB_SELF_SUBSET_STARS(r, sub, sparts, ind, scount, /*gettimer=*/0); + for (int j = 0; j < 8; j++) + if (ci->progeny[j] != sub && ci->progeny[j] != NULL) + DOSUB_PAIR_SUBSET_STARS(r, sub, sparts, ind, scount, ci->progeny[j], + /*gettimer=*/0); + } + + /* Otherwise, compute self-interaction. */ + else + DOSELF1_SUBSET_BRANCH_STARS(r, ci, sparts, ind, scount); } #undef WITH_RT diff --git a/src/runner_doiact_hydro.h b/src/runner_doiact_hydro.h index 04824e87eecc32f91aa4352176212c853ceaf56b..0b0e34feed74bbfe9758fce98324f8ddcbff5897 100644 --- a/src/runner_doiact_hydro.h +++ b/src/runner_doiact_hydro.h @@ -92,8 +92,14 @@ #define _DOSUB_PAIR2(f) PASTE(runner_dosub_pair2, f) #define DOSUB_PAIR2 _DOSUB_PAIR2(FUNCTION) -#define _DOSUB_SUBSET(f) PASTE(runner_dosub_subset, f) -#define DOSUB_SUBSET _DOSUB_SUBSET(FUNCTION) +#define _DOSUB_SELF_SUBSET(f) PASTE(runner_dosub_self_subset, f) +#define DOSUB_SELF_SUBSET _DOSUB_SELF_SUBSET(FUNCTION) + +#define _DOSUB_PAIR_SUBSET(f) PASTE(runner_dosub_pair_subset, f) +#define DOSUB_PAIR_SUBSET _DOSUB_PAIR_SUBSET(FUNCTION) + +#define _FIND_SUB(f) PASTE(runner_find_sub, f) +#define FIND_SUB _FIND_SUB(FUNCTION) #define _IACT_NONSYM(f) PASTE(runner_iact_nonsym, f) #define IACT_NONSYM _IACT_NONSYM(FUNCTION) @@ -165,27 +171,37 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) -void DOSELF1_BRANCH(struct runner *r, struct cell *c); -void DOSELF2_BRANCH(struct runner *r, struct cell *c); +void DOSELF1_BRANCH(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h); +void DOSELF2_BRANCH(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h); -void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); -void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h); +void DOPAIR2_BRANCH(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h); -void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer); -void DOSUB_SELF2(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_SELF1(struct runner *r, struct cell *c, int recurse_below_h_max, + const int gettimer); +void DOSUB_SELF2(struct runner *r, struct cell *c, int recurse_below_h_max, + const int gettimer); void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer); + int recurse_below_h_max, const int gettimer); void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer); + int recurse_below_h_max, const int gettimer); + +void DOSELF_SUBSET_BRANCH(struct runner *r, const struct cell *ci, + struct part *restrict parts, const int *ind, + const int count); -void DOSELF_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, - struct part *restrict parts, int *restrict ind, - int count); +void DOPAIR_SUBSET_BRANCH(struct runner *r, const struct cell *restrict ci, + struct part *restrict parts_i, const int *ind, + const int count, struct cell *restrict cj); -void DOPAIR_SUBSET_BRANCH(struct runner *r, struct cell *restrict ci, - struct part *restrict parts_i, int *restrict ind, - int count, struct cell *restrict cj); +void DOSUB_PAIR_SUBSET(struct runner *r, struct cell *ci, struct part *parts, + const int *ind, const int count, struct cell *cj, + const int gettimer); -void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, - int *ind, int count, struct cell *cj, int gettimer); +void DOSUB_SELF_SUBSET(struct runner *r, struct cell *ci, struct part *parts, + const int *ind, const int count, const int gettimer); diff --git a/src/runner_doiact_limiter.h b/src/runner_doiact_limiter.h index 4671c39db4a569122e9a2de6b8087307f1445b99..b4fdff50680833ca256330c4510b540c8bdf470a 100644 --- a/src/runner_doiact_limiter.h +++ b/src/runner_doiact_limiter.h @@ -92,11 +92,14 @@ #define _TIMER_DOSUB_PAIR(f) PASTE(timer_dosub_pair, f) #define TIMER_DOSUB_PAIR _TIMER_DOSUB_PAIR(FUNCTION) -void DOSELF1_BRANCH(struct runner *r, struct cell *c); +void DOSELF1_BRANCH(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h); -void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj); +void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h); -void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_SELF1(struct runner *r, struct cell *ci, int recurse_below_h_max, + const int gettimer); void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer); + int recurse_below_h_max, const int gettimer); diff --git a/src/runner_doiact_stars.h b/src/runner_doiact_stars.h index 1bc31f5e92c3f95aa0c9c0ffef6fcf2276eeb12f..4e991c2cdfcb3de4e371d89911f50cbf59cc8fce 100644 --- a/src/runner_doiact_stars.h +++ b/src/runner_doiact_stars.h @@ -59,6 +59,15 @@ #define _DOSUB_SUBSET_STARS(f) PASTE(runner_dosub_subset_stars, f) #define DOSUB_SUBSET_STARS _DOSUB_SUBSET_STARS(FUNCTION) +#define _DOSUB_SELF_SUBSET_STARS(f) PASTE(runner_dosub_self_subset_stars, f) +#define DOSUB_SELF_SUBSET_STARS _DOSUB_SELF_SUBSET_STARS(FUNCTION) + +#define _DOSUB_PAIR_SUBSET_STARS(f) PASTE(runner_dosub_pair_subset_stars, f) +#define DOSUB_PAIR_SUBSET_STARS _DOSUB_PAIR_SUBSET_STARS(FUNCTION) + +#define _FIND_SUB_STARS(f) PASTE(runner_find_sub_stars, f) +#define FIND_SUB_STARS _FIND_SUB_STARS(FUNCTION) + #define _DOSELF1_BRANCH_STARS(f) PASTE(runner_doself_branch_stars, f) #define DOSELF1_BRANCH_STARS _DOSELF1_BRANCH_STARS(FUNCTION) @@ -86,21 +95,34 @@ #define _IACT_STARS(f) PASTE(runner_iact_nonsym_stars, f) #define IACT_STARS _IACT_STARS(FUNCTION) -void DOSELF1_BRANCH_STARS(struct runner *r, struct cell *c); -void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj); +void DOSELF1_BRANCH_STARS(struct runner *r, const struct cell *c, + const int limit_min_h, const int limit_max_h); +void DOPAIR1_BRANCH_STARS(struct runner *r, struct cell *ci, struct cell *cj, + const int limit_min_h, const int limit_max_h); -void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, int gettimer); +void DOSUB_SELF1_STARS(struct runner *r, struct cell *ci, + int recurse_below_h_max, const int gettimer); void DOSUB_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer); + int recurse_below_h_max, const int gettimer); -void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, - struct spart *restrict sparts, - int *restrict ind, int scount); +void DOSELF1_SUBSET_BRANCH_STARS(struct runner *r, const struct cell *ci, + struct spart *restrict sparts, const int *ind, + const int scount); -void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, struct cell *restrict ci, +void DOPAIR1_SUBSET_BRANCH_STARS(struct runner *r, + const struct cell *restrict ci, struct spart *restrict sparts_i, - int *restrict ind, int scount, + const int *ind, const int scount, struct cell *restrict cj); +void DOSUB_PAIR_SUBSET_STARS(struct runner *r, struct cell *ci, + struct spart *sparts, const int *ind, + const int scount, struct cell *cj, + const int gettimer); + +void DOSUB_SELF_SUBSET_STARS(struct runner *r, struct cell *ci, + struct spart *sparts, const int *ind, + const int scount, const int gettimer); + void DOSUB_SUBSET_STARS(struct runner *r, struct cell *ci, struct spart *sparts, int *ind, int scount, struct cell *cj, int gettimer); diff --git a/src/runner_ghost.c b/src/runner_ghost.c index ff53dd758cbb5a59244e73cfdf694829292e8aa4..77d584a37cee45869fc2055e48e7018af131db37 100644 --- a/src/runner_ghost.c +++ b/src/runner_ghost.c @@ -510,19 +510,19 @@ void runner_do_stars_ghost(struct runner *r, struct cell *c, int timer) { /* Otherwise, sub-self interaction? */ else if (l->t->type == task_type_sub_self) - runner_dosub_subset_stars_density(r, finger, sparts, sid, scount, - NULL, 1); + runner_dosub_self_subset_stars_density(r, finger, sparts, sid, + scount, 1); /* Otherwise, sub-pair interaction? */ else if (l->t->type == task_type_sub_pair) { /* Left or right? */ if (l->t->ci == finger) - runner_dosub_subset_stars_density(r, finger, sparts, sid, - scount, l->t->cj, 1); + runner_dosub_pair_subset_stars_density(r, finger, sparts, sid, + scount, l->t->cj, 1); else - runner_dosub_subset_stars_density(r, finger, sparts, sid, - scount, l->t->ci, 1); + runner_dosub_pair_subset_stars_density(r, finger, sparts, sid, + scount, l->t->ci, 1); } } } @@ -1552,19 +1552,18 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { /* Otherwise, sub-self interaction? */ else if (l->t->type == task_type_sub_self) - runner_dosub_subset_density(r, finger, parts, pid, count, NULL, - 1); + runner_dosub_self_subset_density(r, finger, parts, pid, count, 1); /* Otherwise, sub-pair interaction? */ else if (l->t->type == task_type_sub_pair) { /* Left or right? */ if (l->t->ci == finger) - runner_dosub_subset_density(r, finger, parts, pid, count, - l->t->cj, 1); + runner_dosub_pair_subset_density(r, finger, parts, pid, count, + l->t->cj, 1); else - runner_dosub_subset_density(r, finger, parts, pid, count, - l->t->ci, 1); + runner_dosub_pair_subset_density(r, finger, parts, pid, count, + l->t->ci, 1); } } } diff --git a/src/runner_main.c b/src/runner_main.c index 14b721053661bc54b8a5be1f4b2f5700fe68cdd7..8b2d268144899bb32a614d28d7e919cae6eccbbc 100644 --- a/src/runner_main.c +++ b/src/runner_main.c @@ -205,30 +205,38 @@ void *runner_main(void *data) { /* Different types of tasks... */ switch (t->type) { case task_type_self: - if (t->subtype == task_subtype_density) - runner_doself1_branch_density(r, ci); + if (t->subtype == task_subtype_grav) + runner_doself_recursive_grav(r, ci, 1); + else if (t->subtype == task_subtype_external_grav) + runner_do_grav_external(r, ci, 1); + else if (t->subtype == task_subtype_density) + runner_doself1_branch_density(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) - runner_doself1_branch_gradient(r, ci); + runner_doself1_branch_gradient(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); #endif else if (t->subtype == task_subtype_force) - runner_doself2_branch_force(r, ci); + runner_doself2_branch_force(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_limiter) - runner_doself1_branch_limiter(r, ci); - else if (t->subtype == task_subtype_grav) - runner_doself_recursive_grav(r, ci, 1); - else if (t->subtype == task_subtype_external_grav) - runner_do_grav_external(r, ci, 1); + runner_doself1_branch_limiter(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_stars_density) - runner_doself_branch_stars_density(r, ci); + runner_doself_branch_stars_density(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); #ifdef EXTRA_STAR_LOOPS else if (t->subtype == task_subtype_stars_prep1) - runner_doself_branch_stars_prep1(r, ci); + runner_doself_branch_stars_prep1(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_stars_prep2) - runner_doself_branch_stars_prep2(r, ci); + runner_doself_branch_stars_prep2(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); #endif else if (t->subtype == task_subtype_stars_feedback) - runner_doself_branch_stars_feedback(r, ci); + runner_doself_branch_stars_feedback(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_bh_density) runner_doself_branch_bh_density(r, ci); else if (t->subtype == task_subtype_bh_swallow) @@ -240,9 +248,11 @@ void *runner_main(void *data) { else if (t->subtype == task_subtype_bh_feedback) runner_doself_branch_bh_feedback(r, ci); else if (t->subtype == task_subtype_rt_gradient) - runner_doself1_branch_rt_gradient(r, ci); + runner_doself1_branch_rt_gradient(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_rt_transport) - runner_doself2_branch_rt_transport(r, ci); + runner_doself2_branch_rt_transport(r, ci, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_sink_density) runner_doself_branch_sinks_density(r, ci); else if (t->subtype == task_subtype_sink_swallow) @@ -257,28 +267,36 @@ void *runner_main(void *data) { break; case task_type_pair: - if (t->subtype == task_subtype_density) - runner_dopair1_branch_density(r, ci, cj); + if (t->subtype == task_subtype_grav) + runner_dopair_recursive_grav(r, ci, cj, 1); + else if (t->subtype == task_subtype_density) + runner_dopair1_branch_density(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) - runner_dopair1_branch_gradient(r, ci, cj); + runner_dopair1_branch_gradient(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); #endif else if (t->subtype == task_subtype_force) - runner_dopair2_branch_force(r, ci, cj); + runner_dopair2_branch_force(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_limiter) - runner_dopair1_branch_limiter(r, ci, cj); - else if (t->subtype == task_subtype_grav) - runner_dopair_recursive_grav(r, ci, cj, 1); + runner_dopair1_branch_limiter(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_stars_density) - runner_dopair_branch_stars_density(r, ci, cj); + runner_dopair_branch_stars_density(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); #ifdef EXTRA_STAR_LOOPS else if (t->subtype == task_subtype_stars_prep1) - runner_dopair_branch_stars_prep1(r, ci, cj); + runner_dopair_branch_stars_prep1(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_stars_prep2) - runner_dopair_branch_stars_prep2(r, ci, cj); + runner_dopair_branch_stars_prep2(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); #endif else if (t->subtype == task_subtype_stars_feedback) - runner_dopair_branch_stars_feedback(r, ci, cj); + runner_dopair_branch_stars_feedback(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_bh_density) runner_dopair_branch_bh_density(r, ci, cj); else if (t->subtype == task_subtype_bh_swallow) @@ -290,9 +308,11 @@ void *runner_main(void *data) { else if (t->subtype == task_subtype_bh_feedback) runner_dopair_branch_bh_feedback(r, ci, cj); else if (t->subtype == task_subtype_rt_gradient) - runner_dopair1_branch_rt_gradient(r, ci, cj); + runner_dopair1_branch_rt_gradient(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_rt_transport) - runner_dopair2_branch_rt_transport(r, ci, cj); + runner_dopair2_branch_rt_transport(r, ci, cj, /*limit_h_min=*/0, + /*limit_h_max=*/0); else if (t->subtype == task_subtype_sink_density) runner_dopair_branch_sinks_density(r, ci, cj); else if (t->subtype == task_subtype_sink_swallow) @@ -308,25 +328,25 @@ void *runner_main(void *data) { case task_type_sub_self: if (t->subtype == task_subtype_density) - runner_dosub_self1_density(r, ci, 1); + runner_dosub_self1_density(r, ci, /*below_h_max=*/0, 1); #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) - runner_dosub_self1_gradient(r, ci, 1); + runner_dosub_self1_gradient(r, ci, /*below_h_max=*/0, 1); #endif else if (t->subtype == task_subtype_force) - runner_dosub_self2_force(r, ci, 1); + runner_dosub_self2_force(r, ci, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_limiter) - runner_dosub_self1_limiter(r, ci, 1); + runner_dosub_self1_limiter(r, ci, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_stars_density) - runner_dosub_self_stars_density(r, ci, 1); + runner_dosub_self_stars_density(r, ci, /*below_h_max=*/0, 1); #ifdef EXTRA_STAR_LOOPS else if (t->subtype == task_subtype_stars_prep1) - runner_dosub_self_stars_prep1(r, ci, 1); + runner_dosub_self_stars_prep1(r, ci, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_stars_prep2) - runner_dosub_self_stars_prep2(r, ci, 1); + runner_dosub_self_stars_prep2(r, ci, /*below_h_max=*/0, 1); #endif else if (t->subtype == task_subtype_stars_feedback) - runner_dosub_self_stars_feedback(r, ci, 1); + runner_dosub_self_stars_feedback(r, ci, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_bh_density) runner_dosub_self_bh_density(r, ci, 1); else if (t->subtype == task_subtype_bh_swallow) @@ -338,9 +358,9 @@ void *runner_main(void *data) { else if (t->subtype == task_subtype_bh_feedback) runner_dosub_self_bh_feedback(r, ci, 1); else if (t->subtype == task_subtype_rt_gradient) - runner_dosub_self1_rt_gradient(r, ci, 1); + runner_dosub_self1_rt_gradient(r, ci, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_rt_transport) - runner_dosub_self2_rt_transport(r, ci, 1); + runner_dosub_self2_rt_transport(r, ci, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_sink_density) runner_dosub_self_sinks_density(r, ci, 1); else if (t->subtype == task_subtype_sink_swallow) @@ -356,25 +376,25 @@ void *runner_main(void *data) { case task_type_sub_pair: if (t->subtype == task_subtype_density) - runner_dosub_pair1_density(r, ci, cj, 1); + runner_dosub_pair1_density(r, ci, cj, /*below_h_max=*/0, 1); #ifdef EXTRA_HYDRO_LOOP else if (t->subtype == task_subtype_gradient) - runner_dosub_pair1_gradient(r, ci, cj, 1); + runner_dosub_pair1_gradient(r, ci, cj, /*below_h_max=*/0, 1); #endif else if (t->subtype == task_subtype_force) - runner_dosub_pair2_force(r, ci, cj, 1); + runner_dosub_pair2_force(r, ci, cj, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_limiter) - runner_dosub_pair1_limiter(r, ci, cj, 1); + runner_dosub_pair1_limiter(r, ci, cj, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_stars_density) - runner_dosub_pair_stars_density(r, ci, cj, 1); + runner_dosub_pair_stars_density(r, ci, cj, /*below_h_max=*/0, 1); #ifdef EXTRA_STAR_LOOPS else if (t->subtype == task_subtype_stars_prep1) - runner_dosub_pair_stars_prep1(r, ci, cj, 1); + runner_dosub_pair_stars_prep1(r, ci, cj, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_stars_prep2) - runner_dosub_pair_stars_prep2(r, ci, cj, 1); + runner_dosub_pair_stars_prep2(r, ci, cj, /*below_h_max=*/0, 1); #endif else if (t->subtype == task_subtype_stars_feedback) - runner_dosub_pair_stars_feedback(r, ci, cj, 1); + runner_dosub_pair_stars_feedback(r, ci, cj, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_bh_density) runner_dosub_pair_bh_density(r, ci, cj, 1); else if (t->subtype == task_subtype_bh_swallow) @@ -386,9 +406,9 @@ void *runner_main(void *data) { else if (t->subtype == task_subtype_bh_feedback) runner_dosub_pair_bh_feedback(r, ci, cj, 1); else if (t->subtype == task_subtype_rt_gradient) - runner_dosub_pair1_rt_gradient(r, ci, cj, 1); + runner_dosub_pair1_rt_gradient(r, ci, cj, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_rt_transport) - runner_dosub_pair2_rt_transport(r, ci, cj, 1); + runner_dosub_pair2_rt_transport(r, ci, cj, /*below_h_max=*/0, 1); else if (t->subtype == task_subtype_sink_density) runner_dosub_pair_sinks_density(r, ci, cj, 1); else if (t->subtype == task_subtype_sink_swallow)