diff --git a/.gitignore b/.gitignore index 4e2f82309eb0dbf56f8eab50d2df14b65865d858..7d6d9021f12ebfcb837d19c443362f1ecbc4077f 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,7 @@ theory/SPH/*.pdf theory/paper_pasc/pasc_paper.pdf theory/Multipoles/fmm.pdf theory/Multipoles/fmm_standalone.pdf +theory/Multipoles/potential.pdf m4/libtool.m4 m4/ltoptions.m4 diff --git a/README b/README index 6e2b2cb2304aaa1790646de5316b7c08362779bf..2dedb32a04a7cf143c3e65560c45a68c0e5d1c2a 100644 --- a/README +++ b/README @@ -27,6 +27,7 @@ Valid options are: -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements. -g Run with an external gravitational potential. -G Run with self-gravity. + -M Reconstruct the multipoles every time-step. -n {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. -s Run with hydrodynamics. -S Run with stars. diff --git a/configure.ac b/configure.ac index 6c0a06005778c499c53ae8e596a0685a78886542..e979580470671143348d986e82ad1d1861eb3129 100644 --- a/configure.ac +++ b/configure.ac @@ -215,6 +215,21 @@ elif test "$gravity_force_checks" != "no"; then AC_DEFINE_UNQUOTED([SWIFT_GRAVITY_FORCE_CHECKS], [$enableval] ,[Enable gravity brute-force checks]) fi +# Check if we want to zero the gravity forces for all particles below some ID. +AC_ARG_ENABLE([no-gravity-below-id], + [AS_HELP_STRING([--enable-no-gravity-below-id], + [Zeros the gravitational acceleration of all particles with an ID smaller than @<:@N@:>@] + )], + [no_gravity_below_id="$enableval"], + [no_gravity_below_id="no"] +) +if test "$no_gravity_below_id" == "yes"; then + AC_MSG_ERROR(Need to specify the ID below which particles get zero forces when using --enable-no-gravity-below-id!) +elif test "$no_gravity_below_id" != "no"; then + AC_DEFINE_UNQUOTED([SWIFT_NO_GRAVITY_BELOW_ID], [$enableval] ,[Particles with smaller ID than this will have zero gravity forces]) +fi + + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN @@ -854,12 +869,14 @@ AC_MSG_RESULT([ Adiabatic index : $with_gamma Riemann solver : $with_riemann Cooling function : $with_cooling - External potential : $with_potential - Multipole order : $with_multipole_order - - Task debugging : $enable_task_debugging - Debugging checks : $enable_debugging_checks - Gravity checks : $gravity_force_checks + + External potential : $with_potential + Multipole order : $with_multipole_order + No gravity below ID : $no_gravity_below_id + + Task debugging : $enable_task_debugging + Debugging checks : $enable_debugging_checks + Gravity checks : $gravity_force_checks ]) # Make sure the latest git revision string gets included diff --git a/examples/main.c b/examples/main.c index 5f42f3e06ea46d0c3b73363956f470016e5d1498..66d9b2d40ae4c270dd6d00a99d8021a201146f98 100644 --- a/examples/main.c +++ b/examples/main.c @@ -82,6 +82,8 @@ void print_help_message() { "Run with an external gravitational potential."); printf(" %2s %8s %s\n", "-F", "", "Run with feedback."); printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity."); + printf(" %2s %8s %s\n", "-M", "", + "Reconstruct the multipoles every time-step."); printf(" %2s %8s %s\n", "-n", "{int}", "Execute a fixed number of time steps. When unset use the time_end " "parameter to stop."); @@ -164,6 +166,7 @@ int main(int argc, char *argv[]) { int with_stars = 0; int with_fp_exceptions = 0; int with_drift_all = 0; + int with_mpole_reconstruction = 0; int verbose = 0; int nr_threads = 1; int with_verbose_timers = 0; @@ -172,7 +175,8 @@ int main(int argc, char *argv[]) { /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhn:sSt:Tv:y:")) != -1) switch (c) { + while ((c = getopt(argc, argv, "acCdDef:FgGhMn:sSt:Tv:y:")) != -1) + switch (c) { case 'a': with_aff = 1; break; @@ -210,6 +214,9 @@ int main(int argc, char *argv[]) { case 'h': if (myrank == 0) print_help_message(); return 0; + case 'M': + with_mpole_reconstruction = 1; + break; case 'n': if (sscanf(optarg, "%d", &nsteps) != 1) { if (myrank == 0) printf("Error parsing fixed number of steps.\n"); @@ -521,6 +528,8 @@ int main(int argc, char *argv[]) { /* Construct the engine policy */ int engine_policies = ENGINE_POLICY | engine_policy_steal; if (with_drift_all) engine_policies |= engine_policy_drift_all; + if (with_mpole_reconstruction) + engine_policies |= engine_policy_reconstruct_mpoles; if (with_hydro) engine_policies |= engine_policy_hydro; if (with_self_gravity) engine_policies |= engine_policy_self_gravity; if (with_external_gravity) engine_policies |= engine_policy_external_gravity; @@ -628,7 +637,7 @@ int main(int argc, char *argv[]) { for (int k = 0; k < timer_count; k++) printf("%.3f\t", clocks_from_ticks(timers[k])); printf("\n"); - timers_reset(0xFFFFFFFFllu); + timers_reset(timers_mask_all); } #ifdef SWIFT_DEBUG_TASKS diff --git a/src/Makefile.am b/src/Makefile.am index 1da7a0d955e488c0b96ee209080b5438356a36bc..644094eefb62abd635ec5d6066802de006f05177 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -46,7 +46,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ - vector_power.h collectgroup.h hydro_space.h sort_part.h + gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ diff --git a/src/cell.c b/src/cell.c index 97ad8e051f0d484c5fc9d12de02898ac0c0cfe2f..a1ab6eb6c1e4d9f5d7a16dbd776ea2c0e07bfcdf 100644 --- a/src/cell.c +++ b/src/cell.c @@ -129,6 +129,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { temp->depth = c->depth + 1; temp->split = 0; temp->dx_max = 0.f; + temp->dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; @@ -1103,33 +1104,93 @@ void cell_reset_task_counters(struct cell *c) { } /** - * @brief Checks whether the cells are direct neighbours ot not. Both cells have - * to be of the same size + * @brief Recursively construct all the multipoles in a cell hierarchy. * - * @param ci First #cell. - * @param cj Second #cell. - * - * @todo Deal with periodicity. + * @param c The #cell. + * @param ti_current The current integer time. */ -int cell_are_neighbours(const struct cell *restrict ci, - const struct cell *restrict cj) { +void cell_make_multipoles(struct cell *c, integertime_t ti_current) { -#ifdef SWIFT_DEBUG_CHECKS - if (ci->width[0] != cj->width[0]) error("Cells of different size !"); -#endif + /* Reset everything */ + gravity_reset(c->multipole); - /* Maximum allowed distance */ - const double min_dist = - 1.2 * ci->width[0]; /* 1.2 accounts for rounding errors */ + if (c->split) { - /* (Manhattan) Distance between the cells */ - for (int k = 0; k < 3; k++) { - const double center_i = ci->loc[k]; - const double center_j = cj->loc[k]; - if (fabs(center_i - center_j) > min_dist) return 0; + /* Compute CoM of all progenies */ + double CoM[3] = {0., 0., 0.}; + double mass = 0.; + + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + const struct gravity_tensors *m = c->progeny[k]->multipole; + CoM[0] += m->CoM[0] * m->m_pole.M_000; + CoM[1] += m->CoM[1] * m->m_pole.M_000; + CoM[2] += m->CoM[2] * m->m_pole.M_000; + mass += m->m_pole.M_000; + } + } + c->multipole->CoM[0] = CoM[0] / mass; + c->multipole->CoM[1] = CoM[1] / mass; + c->multipole->CoM[2] = CoM[2] / mass; + + /* Now shift progeny multipoles and add them up */ + struct multipole temp; + double r_max = 0.; + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + const struct cell *cp = c->progeny[k]; + const struct multipole *m = &cp->multipole->m_pole; + + /* Contribution to multipole */ + gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM); + gravity_multipole_add(&c->multipole->m_pole, &temp); + + /* Upper limit of max CoM<->gpart distance */ + const double dx = c->multipole->CoM[0] - cp->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] - cp->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] - cp->multipole->CoM[2]; + const double r2 = dx * dx + dy * dy + dz * dz; + r_max = max(r_max, cp->multipole->r_max + sqrt(r2)); + } + } + /* Alternative upper limit of max CoM<->gpart distance */ + const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + + /* Take minimum of both limits */ + c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + + } else { + + if (c->gcount > 0) { + gravity_P2M(c->multipole, c->gparts, c->gcount); + const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz); + } else { + gravity_multipole_init(&c->multipole->m_pole); + c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; + c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; + c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; + c->multipole->r_max = 0.; + } } - return 1; + c->ti_old_multipole = ti_current; } /** @@ -1145,6 +1206,8 @@ void cell_check_multipole(struct cell *c, void *data) { struct gravity_tensors ma; const double tolerance = 1e-3; /* Relative */ + return; + /* First recurse */ if (c->split) for (int k = 0; k < 8; k++) @@ -1244,28 +1307,45 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Un-skip the density tasks involved with this cell. */ for (struct link *l = c->density; l != NULL; l = l->next) { struct task *t = l->t; - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; scheduler_activate(s, t); /* Set the correct sorting flags */ if (t->type == task_type_pair) { + if (ci->dx_max_sort > space_maxreldx * ci->dmin) { + for (struct cell *finger = ci; finger != NULL; finger = finger->parent) + finger->sorted = 0; + } + if (cj->dx_max_sort > space_maxreldx * cj->dmin) { + for (struct cell *finger = cj; finger != NULL; finger = finger->parent) + finger->sorted = 0; + } if (!(ci->sorted & (1 << t->flags))) { - atomic_or(&ci->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(ci->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, ci->sorts); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); } if (!(cj->sorted & (1 << t->flags))) { - atomic_or(&cj->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(cj->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, cj->sorts); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); } } - /* Check whether there was too much particle motion */ + /* Only interested in pair interactions as of here. */ if (t->type == task_type_pair || t->type == task_type_sub_pair) { + + /* Check whether there was too much particle motion, i.e. the + cell neighbour conditions were violated. */ if (t->tight && - (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx * ci->h_max || - cj->dx_max > space_maxreldx * cj->h_max)) + max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) rebuild = 1; #ifdef WITH_MPI @@ -1287,10 +1367,12 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (cj->super->drift) - scheduler_activate(s, cj->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -1323,10 +1405,12 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (ci->super->drift) - scheduler_activate(s, ci->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -1341,6 +1425,14 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); } + } else if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); + } +#else + if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); } #endif } @@ -1355,7 +1447,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); if (c->ghost != NULL) scheduler_activate(s, c->ghost); - if (c->init != NULL) scheduler_activate(s, c->init); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); if (c->drift != NULL) scheduler_activate(s, c->drift); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); @@ -1409,7 +1500,9 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { /* Drift from the last time the cell was drifted to the current time */ const double dt = (ti_current - ti_old) * timeBase; - float dx_max = 0.f, dx2_max = 0.f, cell_h_max = 0.f; + float dx_max = 0.f, dx2_max = 0.f; + float dx_max_sort = 0.0f, dx2_max_sort = 0.f; + float cell_h_max = 0.f; /* Check that we are actually going to move forward. */ if (ti_current < ti_old) error("Attempt to drift to the past"); @@ -1421,8 +1514,13 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; + + /* Collect */ cell_drift_particles(cp, e); + + /* Update */ dx_max = max(dx_max, cp->dx_max); + dx_max_sort = max(dx_max_sort, cp->dx_max_sort); cell_h_max = max(cell_h_max, cp->h_max); } @@ -1443,6 +1541,11 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { gp->x_diff[1] * gp->x_diff[1] + gp->x_diff[2] * gp->x_diff[2]; dx2_max = max(dx2_max, dx2); + + /* Init gravity force fields. */ + if (gpart_is_active(gp, e)) { + gravity_init_gpart(gp); + } } /* Loop over all the gas particles in the cell */ @@ -1464,9 +1567,18 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { xp->x_diff[1] * xp->x_diff[1] + xp->x_diff[2] * xp->x_diff[2]; dx2_max = max(dx2_max, dx2); + const float dx2_sort = xp->x_diff_sort[0] * xp->x_diff_sort[0] + + xp->x_diff_sort[1] * xp->x_diff_sort[1] + + xp->x_diff_sort[2] * xp->x_diff_sort[2]; + dx2_max_sort = max(dx2_max_sort, dx2_sort); /* Maximal smoothing length */ cell_h_max = max(cell_h_max, p->h); + + /* Get ready for a density calculation */ + if (part_is_active(p, e)) { + hydro_init_part(p, &e->s->hs); + } } /* Loop over all the star particles in the cell */ @@ -1484,16 +1596,19 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); } else { cell_h_max = c->h_max; dx_max = c->dx_max; + dx_max_sort = c->dx_max_sort; } /* Store the values */ c->h_max = cell_h_max; c->dx_max = dx_max; + c->dx_max_sort = dx_max_sort; /* Update the time of the last drift */ c->ti_old = ti_current; diff --git a/src/cell.h b/src/cell.h index b3bc7363f59ba6f9b117bfa690088ab9de40a28e..c89e70960e84bb027f5e99a3cb362f2e0722b9bd 100644 --- a/src/cell.h +++ b/src/cell.h @@ -148,9 +148,6 @@ struct cell { /*! Linked list of the tasks computing this cell's gravity forces. */ struct link *grav; - /*! The particle initialistation task */ - struct task *init; - /*! The multipole initialistation task */ struct task *init_grav; @@ -239,6 +236,9 @@ struct cell { /*! Last (integer) time the cell's particle was drifted forward in time. */ integertime_t ti_old; + /*! Last (integer) time the cell's sort arrays were updated. */ + integertime_t ti_sort; + /*! Last (integer) time the cell's multipole was drifted forward in time. */ integertime_t ti_old_multipole; @@ -248,6 +248,9 @@ struct cell { /*! Maximum particle movement in this cell since last construction. */ float dx_max; + /*! Maximum particle movement in this cell since the last sort. */ + float dx_max_sort; + /*! Nr of #part in this cell. */ int count; @@ -351,8 +354,7 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts); int cell_link_sparts(struct cell *c, struct spart *sparts); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); -int cell_are_neighbours(const struct cell *restrict ci, - const struct cell *restrict cj); +void cell_make_multipoles(struct cell *c, integertime_t ti_current); void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); void cell_check_particle_drift_point(struct cell *c, void *data); diff --git a/src/drift.h b/src/drift.h index 9957d829fd28e033df9db325186195a3b396738c..d9b79f7f0549d85b6f05e8ce4a394aaa5b2a4d8d 100644 --- a/src/drift.h +++ b/src/drift.h @@ -101,10 +101,12 @@ __attribute__((always_inline)) INLINE static void drift_part( /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, dt); - /* Compute offset since last cell construction */ - xp->x_diff[0] -= xp->v_full[0] * dt; - xp->x_diff[1] -= xp->v_full[1] * dt; - xp->x_diff[2] -= xp->v_full[2] * dt; + /* Compute offsets since last cell construction */ + for (int k = 0; k < 3; k++) { + const float dx = xp->v_full[k] * dt; + xp->x_diff[k] -= dx; + xp->x_diff_sort[k] -= dx; + } } /** diff --git a/src/engine.c b/src/engine.c index 0fa2844e26e1e574fdda2d8d7292c685db4b4cf6..770fb04e8d1ede0ecefe2e4990884da188c96a03 100644 --- a/src/engine.c +++ b/src/engine.c @@ -144,10 +144,6 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { /* Local tasks only... */ if (c->nodeID == e->nodeID) { - /* Add the init task. */ - c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c, - NULL, 0); - /* Add the two half kicks */ c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, c, NULL, 0); @@ -162,12 +158,6 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->kick2, c->timestep); scheduler_addunlock(s, c->timestep, c->kick1); - /* Add the drift task and its dependencies. */ - c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0, - c, NULL, 0); - - scheduler_addunlock(s, c->drift, c->init); - if (is_self_gravity) { /* Initialisation of the multipoles */ @@ -1020,10 +1010,6 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, /* Create the tasks and their dependencies? */ if (t_xv == NULL) { - if (ci->super->drift == NULL) - ci->super->drift = scheduler_addtask( - s, task_type_drift, task_subtype_none, 0, 0, ci->super, NULL, 0); - t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag, 0, ci, cj, 0); t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, @@ -1063,7 +1049,10 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #endif /* Drift before you send */ - scheduler_addunlock(s, ci->super->drift, t_xv); + if (ci->drift == NULL) + ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, + 0, ci, NULL, 0); + scheduler_addunlock(s, ci->drift, t_xv); /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); @@ -1693,7 +1682,7 @@ void engine_make_self_gravity_tasks(struct engine *e) { /* Are the cells to close for a MM interaction ? */ if (!gravity_multipole_accept(ci->multipole, cj->multipole, - theta_crit_inv)) + theta_crit_inv, 1)) scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, cj, 1); } @@ -1816,15 +1805,22 @@ void engine_count_and_link_tasks(struct engine *e) { struct cell *const ci = t->ci; struct cell *const cj = t->cj; - /* Link sort tasks together. */ - if (t->type == task_type_sort && ci->split) - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) { - scheduler_addunlock(sched, ci->progeny[j]->sorts, t); - } + /* Link sort tasks to the next-higher sort task. */ + if (t->type == task_type_sort) { + struct cell *finger = t->ci->parent; + while (finger != NULL && finger->sorts == NULL) finger = finger->parent; + if (finger != NULL) scheduler_addunlock(sched, t, finger->sorts); + } + + /* Link drift tasks to the next-higher drift task. */ + else if (t->type == task_type_drift) { + struct cell *finger = ci->parent; + while (finger != NULL && finger->drift == NULL) finger = finger->parent; + if (finger != NULL) scheduler_addunlock(sched, t, finger->drift); + } /* Link self tasks to cells. */ - if (t->type == task_type_self) { + else if (t->type == task_type_self) { atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { engine_addlink(e, &ci->density, t); @@ -1895,7 +1891,6 @@ static inline void engine_make_self_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> gravity --> grav_down --> kick */ - scheduler_addunlock(sched, c->super->init, gravity); scheduler_addunlock(sched, c->super->init_grav, gravity); scheduler_addunlock(sched, gravity, c->super->grav_down); } @@ -1912,7 +1907,7 @@ static inline void engine_make_external_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> external gravity --> kick */ - scheduler_addunlock(sched, c->super->init, gravity); + scheduler_addunlock(sched, c->drift, gravity); scheduler_addunlock(sched, gravity, c->super->kick2); } @@ -2008,9 +2003,8 @@ static inline void engine_make_hydro_loops_dependencies( struct scheduler *sched, struct task *density, struct task *gradient, struct task *force, struct cell *c, int with_cooling) { - /* init --> density loop --> ghost --> gradient loop --> extra_ghost */ + /* density loop --> ghost --> gradient loop --> extra_ghost */ /* extra_ghost --> force loop */ - scheduler_addunlock(sched, c->super->init, density); scheduler_addunlock(sched, density, c->super->ghost); scheduler_addunlock(sched, c->super->ghost, gradient); scheduler_addunlock(sched, gradient, c->super->extra_ghost); @@ -2041,8 +2035,7 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, struct task *force, struct cell *c, int with_cooling) { - /* init --> density loop --> ghost --> force loop */ - scheduler_addunlock(sched, c->super->init, density); + /* density loop --> ghost --> force loop */ scheduler_addunlock(sched, density, c->super->ghost); scheduler_addunlock(sched, c->super->ghost, force); @@ -2078,8 +2071,16 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { for (int ind = 0; ind < nr_tasks; ind++) { struct task *t = &sched->tasks[ind]; + /* Sort tasks depend on the drift of the cell. */ + if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->drift, t); + } + /* Self-interaction? */ - if (t->type == task_type_self && t->subtype == task_subtype_density) { + else if (t->type == task_type_self && t->subtype == task_subtype_density) { + + /* Make all density tasks depend on the drift. */ + scheduler_addunlock(sched, t->ci->drift, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ @@ -2113,6 +2114,12 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->drift, t); + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->drift, t); + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ struct task *t2 = scheduler_addtask( @@ -2290,6 +2297,21 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { /* } */ } +void engine_check_sort_tasks(struct engine *e, struct cell *c) { + + /* Find the parent sort task, if any, and copy its flags. */ + if (c->sorts != NULL) { + struct cell *parent = c->parent; + while (parent != NULL && parent->sorts == NULL) parent = parent->parent; + if (parent != NULL) c->sorts->flags |= parent->sorts->flags; + } + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) engine_check_sort_tasks(e, c->progeny[k]); +} + /** * @brief Fill the #space's task list. * @@ -2350,10 +2372,13 @@ void engine_maketasks(struct engine *e) { * pointers. */ for (int k = 0; k < nr_cells; k++) cell_set_super(&cells[k], NULL); - /* Append hierarchical tasks to each cells */ + /* Append hierarchical tasks to each cell. */ for (int k = 0; k < nr_cells; k++) engine_make_hierarchical_tasks(e, &cells[k]); + /* Append hierarchical tasks to each cell. */ + for (int k = 0; k < nr_cells; k++) engine_check_sort_tasks(e, &cells[k]); + /* Run through the tasks and make force tasks for each density task. Each force task depends on the cell ghosts and unlocks the kick task of its super-cell. */ @@ -2437,14 +2462,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements, else if (t->type == task_type_pair || t->type == task_type_sub_pair) { /* Local pointers. */ - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; /* Too much particle movement? */ if (t->tight && - (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx * ci->h_max || - cj->dx_max > space_maxreldx * cj->h_max)) + max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) *rebuild_space = 1; /* Set this task's skip, otherwise nothing to do. */ @@ -2456,20 +2479,37 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If this is not a density task, we don't have to do any of the below. */ if (t->subtype != task_subtype_density) continue; - /* Set the sort flags. */ + /* Set the correct sorting flags */ if (t->type == task_type_pair) { + if (ci->dx_max_sort > space_maxreldx * ci->dmin) { + for (struct cell *finger = ci; finger != NULL; + finger = finger->parent) + finger->sorted = 0; + } + if (cj->dx_max_sort > space_maxreldx * cj->dmin) { + for (struct cell *finger = cj; finger != NULL; + finger = finger->parent) + finger->sorted = 0; + } if (!(ci->sorted & (1 << t->flags))) { - atomic_or(&ci->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(ci->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, ci->sorts); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); } if (!(cj->sorted & (1 << t->flags))) { - atomic_or(&cj->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(cj->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, cj->sorts); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); } } #ifdef WITH_MPI - /* Activate the send/recv flags. */ if (ci->nodeID != engine_rank) { @@ -2488,10 +2528,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (cj->super->drift) - scheduler_activate(s, cj->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -2524,10 +2566,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (ci->super->drift) - scheduler_activate(s, ci->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -2542,15 +2586,22 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); } - } + } else if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); + } +#else + if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); + } #endif } - /* Kick/Drift/Init? */ + /* Kick/Drift? */ else if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_drift || t->type == task_type_init || - t->type == task_type_init_grav) { + t->type == task_type_drift || t->type == task_type_init_grav) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } @@ -3093,6 +3144,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); + /* Init the particle hydro data (by hand). */ + for (size_t k = 0; k < s->nr_parts; k++) + hydro_init_part(&s->parts[k], &e->s->hs); + for (size_t k = 0; k < s->nr_gparts; k++) gravity_init_gpart(&s->gparts[k]); + /* Now, launch the calculation */ TIMER_TIC; engine_launch(e, e->nr_threads); @@ -3138,6 +3194,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* No drift this time */ engine_skip_drift(e); + /* Init the particle hydro data (by hand). */ + for (size_t k = 0; k < s->nr_parts; k++) + hydro_init_part(&s->parts[k], &e->s->hs); + for (size_t k = 0; k < s->nr_gparts; k++) gravity_init_gpart(&s->gparts[k]); + /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -3225,6 +3286,15 @@ void engine_step(struct engine *e) { /* Are we drifting everything (a la Gadget/GIZMO) ? */ if (e->policy & engine_policy_drift_all) engine_drift_all(e); + /* Are we reconstructing the multipoles or drifting them ?*/ + if (e->policy & engine_policy_self_gravity) { + + if (e->policy & engine_policy_reconstruct_mpoles) + engine_reconstruct_multipoles(e); + else + engine_drift_top_multipoles(e); + } + /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -3248,9 +3318,6 @@ void engine_step(struct engine *e) { gravity_exact_force_compute(e->s, e); #endif - /* Do we need to drift the top-level multipoles ? */ - if (e->policy & engine_policy_self_gravity) engine_drift_top_multipoles(e); - /* Start all the tasks. */ TIMER_TIC; engine_launch(e, e->nr_threads); @@ -3262,9 +3329,8 @@ void engine_step(struct engine *e) { gravity_exact_force_check(e->s, e, 1e-1); #endif - /* Let's trigger a rebuild every-so-often for good measure */ // MATTHIEU - // improve - if (!(e->policy & engine_policy_hydro) && + /* Let's trigger a rebuild every-so-often for good measure */ + if (!(e->policy & engine_policy_hydro) && // MATTHIEU improve this (e->policy & engine_policy_self_gravity) && e->step % 20 == 0) e->forcerebuild = 1; @@ -3447,6 +3513,39 @@ void engine_drift_top_multipoles(struct engine *e) { clocks_getunit()); } +void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL && c->nodeID == e->nodeID) { + + /* Construct the multipoles in this cell hierarchy */ + cell_make_multipoles(c, e->ti_current); + } + } +} + +/** + * @brief Reconstruct all the multipoles at all the levels in the tree. + * + * @param e The #engine. + */ +void engine_reconstruct_multipoles(struct engine *e) { + + const ticks tic = getticks(); + + threadpool_map(&e->threadpool, engine_do_reconstruct_multipoles_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 10, e); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + /** * @brief Create and fill the proxies. * diff --git a/src/engine.h b/src/engine.h index 22ee122bb082895131584942bde50509952b98ff..e62b12332d3ac1b985b8f6d7181ea66824ec4f13 100644 --- a/src/engine.h +++ b/src/engine.h @@ -66,9 +66,10 @@ enum engine_policy { engine_policy_external_gravity = (1 << 9), engine_policy_cosmology = (1 << 10), engine_policy_drift_all = (1 << 11), - engine_policy_cooling = (1 << 12), - engine_policy_sourceterms = (1 << 13), - engine_policy_stars = (1 << 14) + engine_policy_reconstruct_mpoles = (1 << 12), + engine_policy_cooling = (1 << 13), + engine_policy_sourceterms = (1 << 14), + engine_policy_stars = (1 << 15) }; extern const char *engine_policy_names[]; @@ -256,6 +257,7 @@ void engine_compute_next_snapshot_time(struct engine *e); void engine_unskip(struct engine *e); void engine_drift_all(struct engine *e); void engine_drift_top_multipoles(struct engine *e); +void engine_reconstruct_multipoles(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, diff --git a/src/gravity.c b/src/gravity.c index 405e52cc116416c27ec236340a0358443e255384..97b2955b32e1513c3d86d1d1f4da2169130feb77 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -32,6 +32,13 @@ #include "error.h" #include "version.h" +struct exact_force_data { + const struct engine *e; + const struct space *s; + int counter_global; + double const_G; +}; + /** * @brief Checks whether the file containing the exact accelerations for * the current choice of parameters already exists. @@ -83,32 +90,23 @@ int gravity_exact_force_file_exits(const struct engine *e) { } /** - * @brief Run a brute-force gravity calculation for a subset of particles. - * - * All gpart with ID modulo SWIFT_GRAVITY_FORCE_CHECKS will get their forces - * computed. - * - * @param s The #space to use. - * @param e The #engine (to access the current time). + * @brief Mapper function for the exact gravity calculation. */ -void gravity_exact_force_compute(struct space *s, const struct engine *e) { - +void gravity_exact_force_compute_mapper(void *map_data, int nr_gparts, + void *extra_data) { #ifdef SWIFT_GRAVITY_FORCE_CHECKS - const ticks tic = getticks(); - const double const_G = e->physical_constants->const_newton_G; + /* Unpack the data */ + struct gpart *restrict gparts = (struct gpart *)map_data; + struct exact_force_data *data = (struct exact_force_data *)extra_data; + const struct space *s = data->s; + const struct engine *e = data->e; + const double const_G = data->const_G; int counter = 0; - /* Let's start by checking whether we already computed these forces */ - if (gravity_exact_force_file_exits(e)) { - message("Exact accelerations already computed. Skipping calculation."); - return; - } - - /* No matching file present ? Do it then */ - for (size_t i = 0; i < s->nr_gparts; ++i) { + for (int i = 0; i < nr_gparts; ++i) { - struct gpart *gpi = &s->gparts[i]; + struct gpart *gpi = &gparts[i]; /* Is the particle active and part of the subset to be tested ? */ if (gpi->id_or_neg_offset % SWIFT_GRAVITY_FORCE_CHECKS == 0 && @@ -118,13 +116,13 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { double a_grav[3] = {0., 0., 0.}; /* Interact it with all other particles in the space.*/ - for (size_t j = 0; j < s->nr_gparts; ++j) { - - /* No self interaction */ - if (i == j) continue; + for (int j = 0; j < (int)s->nr_gparts; ++j) { struct gpart *gpj = &s->gparts[j]; + /* No self interaction */ + if (gpi == gpj) continue; + /* Compute the pairwise distance. */ const double dx[3] = {gpi->x[0] - gpj->x[0], // x gpi->x[1] - gpj->x[1], // y @@ -173,9 +171,47 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { counter++; } } + atomic_add(&data->counter_global, counter); - message("Computed exact gravity for %d gparts (took %.3f %s). ", counter, - clocks_from_ticks(getticks() - tic), clocks_getunit()); +#else + error("Gravity checking function called without the corresponding flag."); +#endif +} + +/** + * @brief Run a brute-force gravity calculation for a subset of particles. + * + * All gpart with ID modulo SWIFT_GRAVITY_FORCE_CHECKS will get their forces + * computed. + * + * @param s The #space to use. + * @param e The #engine (to access the current time). + */ +void gravity_exact_force_compute(struct space *s, const struct engine *e) { + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + + const ticks tic = getticks(); + + /* Let's start by checking whether we already computed these forces */ + if (gravity_exact_force_file_exits(e)) { + message("Exact accelerations already computed. Skipping calculation."); + return; + } + + /* No matching file present ? Do it then */ + struct exact_force_data data; + data.e = e; + data.s = s; + data.counter_global = 0; + data.const_G = e->physical_constants->const_newton_G; + + threadpool_map(&s->e->threadpool, gravity_exact_force_compute_mapper, + s->gparts, s->nr_gparts, sizeof(struct gpart), 1000, &data); + + message("Computed exact gravity for %d gparts (took %.3f %s). ", + data.counter_global, clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("Gravity checking function called without the corresponding flag."); diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h index 337ae67f4620677e7a3dfdd1b0c527eb016504e2..d34fc2884645f6176f8f689bf1d276755f1b659b 100644 --- a/src/gravity_derivatives.h +++ b/src/gravity_derivatives.h @@ -21,7 +21,7 @@ /** * @file gravity_derivatives.h - * @brief Derivatives (up to 3rd order) of the gravitational potential. + * @brief Derivatives (up to 5th order) of the gravitational potential. * * We use the notation of Dehnen, Computational Astrophysics and Cosmology, * 1, 1, pp. 24 (2014), arXiv:1405.2255 diff --git a/src/gravity_properties.c b/src/gravity_properties.c index f52029fa1543ad1f8d0121c8c4e6d362227f4c53..626651a0426956d27182258e12ad59d24b8901f4 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -59,7 +59,8 @@ void gravity_props_init(struct gravity_props *p, void gravity_props_print(const struct gravity_props *p) { - message("Self-gravity scheme: FMM-MM"); + message("Self-gravity scheme: FMM-MM with m-poles of order %d", + SELF_GRAVITY_MULTIPOLE_ORDER); message("Self-gravity time integration: eta=%.4f", p->eta); @@ -68,7 +69,7 @@ void gravity_props_print(const struct gravity_props *p) { message("Self-gravity softening: epsilon=%.4f", p->epsilon); if (p->a_smooth != gravity_props_default_a_smooth) - message("Self-gravity smoothing-scale: a_smooth=%f", p->a_smooth); + message("Self-gravity MM smoothing-scale: a_smooth=%f", p->a_smooth); if (p->r_cut != gravity_props_default_r_cut) message("Self-gravity MM cut-off: r_cut=%f", p->r_cut); @@ -81,6 +82,7 @@ void gravity_props_print_snapshot(hid_t h_grpgrav, io_write_attribute_f(h_grpgrav, "Time integration eta", p->eta); io_write_attribute_f(h_grpgrav, "Softening length", p->epsilon); io_write_attribute_f(h_grpgrav, "Opening angle", p->theta_crit); + io_write_attribute_d(h_grpgrav, "MM order", SELF_GRAVITY_MULTIPOLE_ORDER); io_write_attribute_f(h_grpgrav, "MM a_smooth", p->a_smooth); io_write_attribute_f(h_grpgrav, "MM r_cut", p->r_cut); } diff --git a/src/gravity_softened_derivatives.h b/src/gravity_softened_derivatives.h new file mode 100644 index 0000000000000000000000000000000000000000..2ae927fa9008dbec69df0df413bf8770c16bd68f --- /dev/null +++ b/src/gravity_softened_derivatives.h @@ -0,0 +1,256 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H +#define SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H + +/** + * @file gravity_softened_derivatives.h + * @brief Derivatives of the softened gravitational potential. + * + * We use the notation of Dehnen, Computational Astrophysics and Cosmology, + * 1, 1, pp. 24 (2014), arXiv:1405.2255 + */ + +/* Some standard headers. */ +#include <math.h> + +/* Local headers. */ +#include "inline.h" + +/*************************/ +/* 0th order derivatives */ +/*************************/ + +__attribute__((always_inline)) INLINE static double D_soft_0(double u) { + + /* phi(u) = 3u^7 - 15u^6 + 28u^5 - 21u^4 + 7u^2 - 3 */ + double phi = 3. * u - 15.; + phi = phi * u + 28.; + phi = phi * u - 21.; + phi = phi * u; + phi = phi * u + 7.; + phi = phi * u; + phi = phi * u - 3.; + + return phi; +} + +/** + * @brief \f$ \phi(r_x, r_y, r_z, h) \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_000( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return eps_inv * D_soft_0(u); +} + +/*************************/ +/* 1st order derivatives */ +/*************************/ + +__attribute__((always_inline)) INLINE static double D_soft_1(double u) { + + /* phi(u) = 21u^6 - 90u^5 + 140u^4 - 84u^3 + 14u */ + double phi = 21. * u - 90.; + phi = phi * u + 140.; + phi = phi * u - 84.; + phi = phi * u; + phi = phi * u + 14.; + phi = phi * u; + + return phi; +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z, h)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_100( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return -r_x * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z, h)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_010( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return -r_y * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z, h)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_001( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return -r_z * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/*************************/ +/* 2nd order derivatives */ +/*************************/ + +__attribute__((always_inline)) INLINE static double D_soft_2(double u) { + + /* phi(u) = 126u^5 - 450u^4 + 560u^3 - 252u^2 + 14 */ + double phi = 126. * u - 450.; + phi = phi * u + 560.; + phi = phi * u - 252.; + phi = phi * u; + phi = phi * u + 14.; + + return phi; +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_x^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_200( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return r_x * r_x * eps_inv * eps_inv * D_soft_2(u) + + (r_y * r_y + r_z * r_z) * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_y^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_020( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return r_y * r_y * eps_inv * eps_inv * D_soft_2(u) + + (r_x * r_x + r_z * r_z) * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_z^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_002( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return r_z * r_z * eps_inv * eps_inv * D_soft_2(u) + + (r_x * r_x + r_y * r_y) * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_x\partial r_y} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_110( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return r_x * r_y * eps_inv * eps_inv * D_soft_2(u) - + r_x * r_y * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_x\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_101( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return r_x * r_z * eps_inv * eps_inv * D_soft_2(u) - + r_x * r_z * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_y\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_011( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return r_y * r_z * eps_inv * eps_inv * D_soft_2(u) - + r_y * r_z * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +#endif /* SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H */ diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 332eecb27fb65a6b4da48cbb595450a432c44615..8b14c45614e4c09c48056c9398ed4bfe3ed90e38 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -27,6 +27,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 69ae79666e1db4e4f405c653cfc533606989a73a..571aaf39ed2509d95e9f68c34ab8e6c09ded3fbb 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -39,6 +39,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /* Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h index 29cb6b00190f69e03c3d038ee485519969a8c6e9..d552a3f7e86031311098293845f1aa11270c417f 100644 --- a/src/hydro/Gizmo/hydro_part.h +++ b/src/hydro/Gizmo/hydro_part.h @@ -27,6 +27,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /* Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index dabae1a546d66f61db4f9796c21b71817ca20aac..e9289c099a8a4ee698b016036e4e3d4dad481768 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -46,6 +46,9 @@ struct xpart { /*! Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /*! Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h index b6e496918fa0e7989a8bddcfc5e8ea6b332c338e..6cb6fe87393e376dc189150286079faa9f41cb68 100644 --- a/src/hydro/PressureEntropy/hydro_part.h +++ b/src/hydro/PressureEntropy/hydro_part.h @@ -38,6 +38,9 @@ struct xpart { /*! Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /*! Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Shadowswift/hydro_part.h b/src/hydro/Shadowswift/hydro_part.h index 43237d9c80a5dfa5bed2fe409281b4f89b6aa172..e25400e905b893d13ffb552da42d3fbf96d71fde 100644 --- a/src/hydro/Shadowswift/hydro_part.h +++ b/src/hydro/Shadowswift/hydro_part.h @@ -28,6 +28,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 4a7588d312c381ef60fb97c43f8fadb1e03dfead..db9c9efd41026dd5c9faff3d9a931d399b0a167b 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -28,107 +28,46 @@ #include "minmax.h" #include "vector.h" -/* The gravity kernel is defined as a degree 6 polynomial in the distance - r. The resulting value should be post-multiplied with r^-3, resulting - in a polynomial with terms ranging from r^-3 to r^3, which are - sufficient to model both the direct potential as well as the splines - near the origin. - As in the hydro case, the 1/h^3 needs to be multiplied in afterwards */ - -/* Coefficients for the kernel. */ -#define kernel_grav_name "Gadget-2 softening kernel" -#define kernel_grav_degree 6 /* Degree of the polynomial */ -#define kernel_grav_ivals 2 /* Number of branches */ -static const float - kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] - __attribute__((aligned(16))) = {32.f, - -38.4f, - 0.f, - 10.66666667f, - 0.f, - 0.f, - 0.f, /* 0 < u < 0.5 */ - -10.66666667f, - 38.4f, - -48.f, - 21.3333333f, - 0.f, - 0.f, - -0.066666667f, /* 0.5 < u < 1 */ - 0.f, - 0.f, - 0.f, - 0.f, - 0.f, - 0.f, - 0.f}; /* 1 < u */ - /** * @brief Computes the gravity softening function. * + * This functions assumes 0 < u < 1. + * * @param u The ratio of the distance to the softening length $u = x/h$. * @param W (return) The value of the kernel function $W(x,h)$. */ __attribute__((always_inline)) INLINE static void kernel_grav_eval( float u, float *const W) { - /* Pick the correct branch of the kernel */ - const int ind = (int)min(u * (float)kernel_grav_ivals, kernel_grav_ivals); - const float *const coeffs = - &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)]; - - /* First two terms of the polynomial ... */ - float w = coeffs[0] * u + coeffs[1]; - - /* ... and the rest of them */ - for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k]; - - /* Return everything */ - *W = w / (u * u * u); + /* W(u) = 21u^6 - 90u^5 + 140u^4 - 84u^3 + 14u */ + *W = 21.f * u - 90.f; + *W = *W * u + 140.f; + *W = *W * u - 84.f; + *W = *W * u; + *W = *W * u + 14.f; + *W = *W * u; } #ifdef SWIFT_GRAVITY_FORCE_CHECKS +/** + * @brief Computes the gravity softening function in double precision. + * + * This functions assumes 0 < u < 1. + * + * @param u The ratio of the distance to the softening length $u = x/h$. + * @param W (return) The value of the kernel function $W(x,h)$. + */ __attribute__((always_inline)) INLINE static void kernel_grav_eval_double( double u, double *const W) { - static const double kernel_grav_coeffs_double[(kernel_grav_degree + 1) * - (kernel_grav_ivals + 1)] - __attribute__((aligned(16))) = {32., - -38.4, - 0., - 10.66666667, - 0., - 0., - 0., /* 0 < u < 0.5 */ - -10.66666667, - 38.4, - -48., - 21.3333333, - 0., - 0., - -0.066666667, /* 0.5 < u < 1 */ - 0., - 0., - 0., - 0., - 0., - 0., - 0.}; /* 1 < u */ - - /* Pick the correct branch of the kernel */ - const int ind = (int)min(u * (double)kernel_grav_ivals, kernel_grav_ivals); - const double *const coeffs = - &kernel_grav_coeffs_double[ind * (kernel_grav_degree + 1)]; - - /* First two terms of the polynomial ... */ - double w = coeffs[0] * u + coeffs[1]; - - /* ... and the rest of them */ - for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k]; - - /* Return everything */ - *W = w / (u * u * u); + /* W(u) = 21u^6 - 90u^5 + 140u^4 - 84u^3 + 14u */ + *W = 21. * u - 90.; + *W = *W * u + 140.; + *W = *W * u - 84.; + *W = *W * u; + *W = *W * u + 14.; + *W = *W * u; } #endif /* SWIFT_GRAVITY_FORCE_CHECKS */ diff --git a/src/multipole.h b/src/multipole.h index a633554490fe6226f8fe42befb7cb4eaf1e5135f..ea4105d37a54b30d82c722e89a416509f2032f83 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -34,6 +34,7 @@ #include "error.h" #include "gravity_derivatives.h" #include "gravity_properties.h" +#include "gravity_softened_derivatives.h" #include "inline.h" #include "kernel_gravity.h" #include "minmax.h" @@ -176,9 +177,15 @@ struct gravity_tensors { /*! Centre of mass of the matter dsitribution */ double CoM[3]; + /*! Centre of mass of the matter dsitribution at the last rebuild */ + double CoM_rebuild[3]; + /*! Upper limit of the CoM<->gpart distance */ double r_max; + /*! Upper limit of the CoM<->gpart distance at the last rebuild */ + double r_max_rebuild; + /*! Multipole mass */ struct multipole m_pole; @@ -1232,12 +1239,10 @@ INLINE static void gravity_P2M(struct gravity_tensors *m, * @param m_b The #multipole to shift. * @param pos_a The position to which m_b will be shifted. * @param pos_b The current postion of the multipole to shift. - * @param periodic Is the calculation periodic ? */ INLINE static void gravity_M2M(struct multipole *m_a, const struct multipole *m_b, - const double pos_a[3], const double pos_b[3], - int periodic) { + const double pos_a[3], const double pos_b[3]) { /* Shift bulk velocity */ m_a->vel[0] = m_b->vel[0]; m_a->vel[1] = m_b->vel[1]; @@ -1501,6 +1506,9 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, const struct gravity_props *props, int periodic) { + /* Recover some constants */ + const double eps2 = props->epsilon2; + /* Compute distance vector */ const double dx = periodic ? box_wrap(pos_b[0] - pos_a[0], 0., 1.) : pos_b[0] - pos_a[0]; @@ -1519,7 +1527,7 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, #endif /* Un-softened case */ - if (r2 > props->epsilon2) { + if (r2 > eps2) { /* 0th order term */ l_b->F_000 += m_a->M_000 * D_000(dx, dy, dz, r_inv); @@ -2041,7 +2049,54 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, /* Softened case */ } else { - message("Un-supported softened M2L interaction."); + + const double eps_inv = props->epsilon_inv; + const double r = r2 * r_inv; + + /* 0th order term */ + l_b->F_000 += m_a->M_000 * D_soft_000(dx, dy, dz, r, eps_inv); + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + /* 1st order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_100 * D_soft_100(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_010(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_001(dx, dy, dz, r, eps_inv); + + /* 1st order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_000 * D_soft_100(dx, dy, dz, r, eps_inv); + l_b->F_010 += m_a->M_000 * D_soft_010(dx, dy, dz, r, eps_inv); + l_b->F_001 += m_a->M_000 * D_soft_001(dx, dy, dz, r, eps_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + /* 2nd order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_200 * D_soft_200(dx, dy, dz, r, eps_inv) + + m_a->M_020 * D_soft_020(dx, dy, dz, r, eps_inv) + + m_a->M_002 * D_soft_002(dx, dy, dz, r, eps_inv); + l_b->F_000 += m_a->M_110 * D_soft_110(dx, dy, dz, r, eps_inv) + + m_a->M_101 * D_soft_101(dx, dy, dz, r, eps_inv) + + m_a->M_011 * D_soft_011(dx, dy, dz, r, eps_inv); + + /* 2nd order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_100 * D_soft_200(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_110(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_101(dx, dy, dz, r, eps_inv); + l_b->F_010 += m_a->M_100 * D_soft_110(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_020(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_011(dx, dy, dz, r, eps_inv); + l_b->F_001 += m_a->M_100 * D_soft_101(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_011(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_002(dx, dy, dz, r, eps_inv); + + /* 2nd order multipole term (addition to rank 2)*/ + l_b->F_200 += m_a->M_000 * D_soft_200(dx, dy, dz, r, eps_inv); + l_b->F_020 += m_a->M_000 * D_soft_020(dx, dy, dz, r, eps_inv); + l_b->F_002 += m_a->M_000 * D_soft_002(dx, dy, dz, r, eps_inv); + l_b->F_110 += m_a->M_000 * D_soft_110(dx, dy, dz, r, eps_inv); + l_b->F_101 += m_a->M_000 * D_soft_101(dx, dy, dz, r, eps_inv); + l_b->F_011 += m_a->M_000 * D_soft_011(dx, dy, dz, r, eps_inv); +#endif } } @@ -2521,20 +2576,29 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, * @param ma The #multipole of the first #cell. * @param mb The #multipole of the second #cell. * @param theta_crit_inv The inverse of the critical opening angle. + * @param rebuild Are we using the current value of CoM or the ones from + * the last rebuild ? */ __attribute__((always_inline)) INLINE static int gravity_multipole_accept( const struct gravity_tensors *ma, const struct gravity_tensors *mb, - double theta_crit_inv) { + double theta_crit_inv, int rebuild) { - const double r_crit_a = ma->r_max * theta_crit_inv; - const double r_crit_b = mb->r_max * theta_crit_inv; + const double r_crit_a = + (rebuild ? ma->r_max_rebuild : ma->r_max) * theta_crit_inv; + const double r_crit_b = + (rebuild ? mb->r_max_rebuild : mb->r_max) * theta_crit_inv; - const double dx = ma->CoM[0] - mb->CoM[0]; - const double dy = ma->CoM[1] - mb->CoM[1]; - const double dz = ma->CoM[2] - mb->CoM[2]; + const double dx = rebuild ? ma->CoM_rebuild[0] - mb->CoM_rebuild[0] + : ma->CoM[0] - mb->CoM[0]; + const double dy = rebuild ? ma->CoM_rebuild[1] - mb->CoM_rebuild[1] + : ma->CoM[1] - mb->CoM[1]; + const double dz = rebuild ? ma->CoM_rebuild[2] - mb->CoM_rebuild[2] + : ma->CoM[2] - mb->CoM[2]; const double r2 = dx * dx + dy * dy + dz * dz; + // MATTHIEU: Make this mass-dependent ? + /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b)); } diff --git a/src/partition.c b/src/partition.c index 49dbf883e0dea3f00c93ca33ec8cb0248bbfbfaa..499efab263a9031b0116f073af8cebd5fef0c2eb 100644 --- a/src/partition.c +++ b/src/partition.c @@ -524,7 +524,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, t->type != task_type_sub_self && t->type != task_type_sub_self && t->type != task_type_ghost && t->type != task_type_kick1 && t->type != task_type_kick2 && t->type != task_type_timestep && - t->type != task_type_drift && t->type != task_type_init) + t->type != task_type_drift) continue; /* Get the task weight. */ diff --git a/src/profiler.c b/src/profiler.c index ad8338eebfd130d4088f9fd9d4fcc9856c8cc731..62ba881a15a32de63dba3cce95feab79d595aa84 100644 --- a/src/profiler.c +++ b/src/profiler.c @@ -31,6 +31,46 @@ #include "hydro.h" #include "version.h" +/* Array to store the list of file names. Order must match profiler_types + * enumerator and profiler_func_names. */ +const char *profiler_file_names[profiler_length] = {"enginecollecttimesteps", + "enginedrift", + "enginerebuild", + "schedulerreweight", + "schedulerclearwaits", + "schedulerrewait", + "schedulerenqueue", + "engineprintstats", + "enginelaunch", + "spacerebuild", + "enginemaketasks", + "enginemarktasks", + "spaceregrid", + "spacepartssort", + "spacesplit", + "spacegetcellid", + "spacecountparts"}; + +/* Array to store the list of function names. Order must match profiler_types + * enumerator and profiler_file_names. */ +const char *profiler_func_names[profiler_length] = {"engine_collect_timesteps", + "engine_drift", + "engine_rebuild", + "scheduler_reweight", + "scheduler_clear_waits", + "scheduler_rewait", + "scheduler_enqueue", + "engine_print_stats", + "engine_launch", + "space_rebuild", + "engine_maketasks", + "engine_marktasks", + "space_regrid", + "space_parts_sort", + "space_split", + "space_get_cell_id", + "space_count_parts"}; + /** * @brief Resets all timers. * @@ -38,24 +78,8 @@ * function timers. */ void profiler_reset_timers(struct profiler *profiler) { - - profiler->collect_timesteps_time = 0; - profiler->drift_time = 0; - profiler->rebuild_time = 0; - profiler->reweight_time = 0; - profiler->clear_waits_time = 0; - profiler->re_wait_time = 0; - profiler->enqueue_time = 0; - profiler->stats_time = 0; - profiler->launch_time = 0; - profiler->space_rebuild_time = 0; - profiler->engine_maketasks_time = 0; - profiler->engine_marktasks_time = 0; - profiler->space_regrid_time = 0; - profiler->space_parts_sort_time = 0; - profiler->space_split_time = 0; - profiler->space_parts_get_cell_id_time = 0; - profiler->space_count_parts_time = 0; + /* Iterate over times array and reset values. */ + for (int i = 0; i < profiler_length; i++) profiler->times[i] = 0; } /** @@ -66,8 +90,9 @@ void profiler_reset_timers(struct profiler *profiler) { * @param functionName name of function that is being timed. * @param file (return) pointer used to open output file. */ -void profiler_write_timing_info_header(const struct engine *e, char *fileName, - char *functionName, FILE **file) { +void profiler_write_timing_info_header(const struct engine *e, + const char *fileName, + const char *functionName, FILE **file) { /* Create the file name in the format: "fileName_(no. of threads)" */ char fullFileName[200] = ""; @@ -82,13 +107,14 @@ void profiler_write_timing_info_header(const struct engine *e, char *fileName, "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " "+/- %.2f\n# Eta: %f\n" - "# %6s %14s %14s %10s %10s %16s [%s]\n", + "# %6s %14s %14s %10s %10s %10s %16s [%s]\n", hostname(), functionName, git_revision(), compiler_name(), compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, kernel_name, e->hydro_properties->target_neighbours, e->hydro_properties->delta_neighbours, e->hydro_properties->eta_neighbours, "Step", "Time", "Time-step", - "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); + "Updates", "g-Updates", "s-Updates", "Wall-clock time", + clocks_getunit()); fflush(*file); } @@ -103,44 +129,11 @@ void profiler_write_timing_info_header(const struct engine *e, char *fileName, */ void profiler_write_all_timing_info_headers(const struct engine *e, struct profiler *profiler) { - - profiler_write_timing_info_header(e, "enginecollecttimesteps", - "engine_collect_timesteps", - &profiler->file_engine_collect_timesteps); - profiler_write_timing_info_header(e, "enginedrift", "engine_drift", - &profiler->file_engine_drift); - profiler_write_timing_info_header(e, "enginerebuild", "engine_rebuild", - &profiler->file_engine_rebuild); - profiler_write_timing_info_header(e, "schedulerreweight", - "scheduler_reweight", - &profiler->file_scheduler_reweight); - profiler_write_timing_info_header(e, "schedulerclearwaits", - "scheduler_clear_waits", - &profiler->file_scheduler_clear_waits); - profiler_write_timing_info_header(e, "schedulerrewait", "scheduler_rewait", - &profiler->file_scheduler_re_wait); - profiler_write_timing_info_header(e, "schedulerenqueue", "scheduler_enqueue", - &profiler->file_scheduler_enqueue); - profiler_write_timing_info_header(e, "engineprintstats", "engine_print_stats", - &profiler->file_engine_stats); - profiler_write_timing_info_header(e, "enginelaunch", "engine_launch", - &profiler->file_engine_launch); - profiler_write_timing_info_header(e, "spacerebuild", "space_rebuild", - &profiler->file_space_rebuild); - profiler_write_timing_info_header(e, "enginemaketasks", "engine_maketasks", - &profiler->file_engine_maketasks); - profiler_write_timing_info_header(e, "enginemarktasks", "engine_marktasks", - &profiler->file_engine_marktasks); - profiler_write_timing_info_header(e, "spaceregrid", "space_regrid", - &profiler->file_space_regrid); - profiler_write_timing_info_header(e, "spacepartssort", "space_parts_sort", - &profiler->file_space_parts_sort); - profiler_write_timing_info_header(e, "spacesplit", "space_split", - &profiler->file_space_split); - profiler_write_timing_info_header(e, "spacegetcellid", "space_get_cell_id", - &profiler->file_space_parts_get_cell_id); - profiler_write_timing_info_header(e, "spacecountparts", "space_count_parts", - &profiler->file_space_count_parts); + /* Iterate over files array and write file headers. */ + for (int i = 0; i < profiler_length; i++) { + profiler_write_timing_info_header( + e, profiler_file_names[i], profiler_func_names[i], &profiler->files[i]); + } } /** @@ -153,8 +146,9 @@ void profiler_write_all_timing_info_headers(const struct engine *e, void profiler_write_timing_info(const struct engine *e, ticks time, FILE *file) { - fprintf(file, " %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time, - e->timeStep, e->updates, e->g_updates, clocks_from_ticks(time)); + fprintf(file, " %6d %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->time, + e->timeStep, e->updates, e->g_updates, e->s_updates, + clocks_from_ticks(time)); fflush(file); } @@ -169,39 +163,10 @@ void profiler_write_timing_info(const struct engine *e, ticks time, void profiler_write_all_timing_info(const struct engine *e, struct profiler *profiler) { - profiler_write_timing_info(e, profiler->drift_time, - profiler->file_engine_drift); - profiler_write_timing_info(e, profiler->rebuild_time, - profiler->file_engine_rebuild); - profiler_write_timing_info(e, profiler->reweight_time, - profiler->file_scheduler_reweight); - profiler_write_timing_info(e, profiler->clear_waits_time, - profiler->file_scheduler_clear_waits); - profiler_write_timing_info(e, profiler->re_wait_time, - profiler->file_scheduler_re_wait); - profiler_write_timing_info(e, profiler->enqueue_time, - profiler->file_scheduler_enqueue); - profiler_write_timing_info(e, profiler->stats_time, - profiler->file_engine_stats); - profiler_write_timing_info(e, profiler->launch_time, - profiler->file_engine_launch); - profiler_write_timing_info(e, profiler->space_rebuild_time, - profiler->file_space_rebuild); - profiler_write_timing_info(e, profiler->engine_maketasks_time, - profiler->file_engine_maketasks); - profiler_write_timing_info(e, profiler->engine_marktasks_time, - profiler->file_engine_marktasks); - profiler_write_timing_info(e, profiler->space_regrid_time, - profiler->file_space_regrid); - profiler_write_timing_info(e, profiler->space_parts_sort_time, - profiler->file_space_parts_sort); - profiler_write_timing_info(e, profiler->space_split_time, - profiler->file_space_split); - profiler_write_timing_info(e, profiler->space_parts_get_cell_id_time, - profiler->file_space_parts_get_cell_id); - profiler_write_timing_info(e, profiler->space_count_parts_time, - profiler->file_space_count_parts); - + /* Iterate over times array and print timing info to files. */ + for (int i = 0; i < profiler_length; i++) { + profiler_write_timing_info(e, profiler->times[i], profiler->files[i]); + } /* Reset timers. */ profiler_reset_timers(profiler); } @@ -215,20 +180,6 @@ void profiler_write_all_timing_info(const struct engine *e, */ void profiler_close_files(struct profiler *profiler) { - fclose(profiler->file_engine_drift); - fclose(profiler->file_engine_rebuild); - fclose(profiler->file_scheduler_reweight); - fclose(profiler->file_scheduler_clear_waits); - fclose(profiler->file_scheduler_re_wait); - fclose(profiler->file_scheduler_enqueue); - fclose(profiler->file_engine_stats); - fclose(profiler->file_engine_launch); - fclose(profiler->file_space_rebuild); - fclose(profiler->file_engine_maketasks); - fclose(profiler->file_engine_marktasks); - fclose(profiler->file_space_regrid); - fclose(profiler->file_space_parts_sort); - fclose(profiler->file_space_split); - fclose(profiler->file_space_parts_get_cell_id); - fclose(profiler->file_space_count_parts); + /* Iterate over files array and close files. */ + for (int i = 0; i < profiler_length; i++) fclose(profiler->files[i]); } diff --git a/src/profiler.h b/src/profiler.h index b00bc986ece8b78282b11ce317a6746ecba5a50f..911d4747912d64dd46d2cf59369670ff26a92012 100644 --- a/src/profiler.h +++ b/src/profiler.h @@ -25,46 +25,33 @@ /* Local includes */ #include "engine.h" +/* Enumerator to be used as an index into the timers and files array. To add an + * extra timer extend this list, before the profiler_length value.*/ +enum profiler_types { + profiler_engine_collect_timesteps = 0, + profiler_engine_drift, + profiler_engine_rebuild, + profiler_scheduler_reweight, + profiler_scheduler_clear_waits, + profiler_scheduler_re_wait, + profiler_scheduler_enqueue, + profiler_engine_stats, + profiler_engine_launch, + profiler_space_rebuild, + profiler_engine_maketasks, + profiler_engine_marktasks, + profiler_space_regrid, + profiler_space_parts_sort, + profiler_space_split, + profiler_space_parts_get_cell_id, + profiler_space_count_parts, + profiler_length +}; + /* Profiler that holds file pointers and time taken in functions. */ struct profiler { - - /* File pointers for timing info. */ - FILE *file_engine_collect_timesteps; - FILE *file_engine_drift; - FILE *file_engine_rebuild; - FILE *file_scheduler_reweight; - FILE *file_scheduler_clear_waits; - FILE *file_scheduler_re_wait; - FILE *file_scheduler_enqueue; - FILE *file_engine_stats; - FILE *file_engine_launch; - FILE *file_space_rebuild; - FILE *file_engine_maketasks; - FILE *file_engine_marktasks; - FILE *file_space_regrid; - FILE *file_space_parts_sort; - FILE *file_space_split; - FILE *file_space_parts_get_cell_id; - FILE *file_space_count_parts; - - /* Time taken in functions. */ - ticks collect_timesteps_time; - ticks drift_time; - ticks rebuild_time; - ticks reweight_time; - ticks clear_waits_time; - ticks re_wait_time; - ticks enqueue_time; - ticks stats_time; - ticks launch_time; - ticks space_rebuild_time; - ticks engine_maketasks_time; - ticks engine_marktasks_time; - ticks space_regrid_time; - ticks space_parts_sort_time; - ticks space_split_time; - ticks space_parts_get_cell_id_time; - ticks space_count_parts_time; + FILE *files[profiler_length]; + ticks times[profiler_length]; }; /* Function prototypes. */ diff --git a/src/runner.c b/src/runner.c index 99b399b044f5afdc1729e58ca9d36c7d016ccd70..e21199a32fee2d17f5150f71cd6e03463085fa69 100644 --- a/src/runner.c +++ b/src/runner.c @@ -289,6 +289,13 @@ void runner_do_sort_ascending(struct entry *sort, int N) { } } +void runner_check_sorts(struct cell *c, int flags) { + if (flags & ~c->sorted) error("Inconsistent sort flags (downward)!"); + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_check_sorts(c->progeny[k], c->sorted); +} + /** * @brief Sort the particles in the given cell along all cardinal directions. * @@ -303,16 +310,37 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct entry *finger; struct entry *fingers[8]; struct part *parts = c->parts; + struct xpart *xparts = c->xparts; struct entry *sort; - int j, k, count = c->count; - int i, ind, off[8], inds[8], temp_i, missing; + const int count = c->count; float buff[8]; - double px[3]; - TIMER_TIC + TIMER_TIC; + + /* Check that the particles have been moved to the current time */ + if (!cell_is_drifted(c, r->e)) error("Sorting un-drifted cell"); - /* Clean-up the flags, i.e. filter out what's already been sorted. */ - flags &= ~c->sorted; +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts(c, c->sorted); + + /* Make sure the sort flags are consistent (upard). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->sorted & ~c->sorted) error("Inconsistent sort flags (upward)."); + } +#endif + + /* Clean-up the flags, i.e. filter out what's already been sorted, but + only if the sorts are recent. */ + if (c->ti_sort == r->e->ti_current) { + /* Ignore dimensions that have been sorted in this timestep. */ + // flags &= ~c->sorted; + } else { + /* Clean old (stale) sorts. */ + flags |= c->sorted; + c->sorted = 0; + } if (flags == 0) return; /* start by allocating the entry arrays. */ @@ -329,27 +357,35 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { if (c->split) { /* Fill in the gaps within the progeny. */ - for (k = 0; k < 8; k++) { - if (c->progeny[k] == NULL) continue; - missing = flags & ~c->progeny[k]->sorted; - if (missing) runner_do_sort(r, c->progeny[k], missing, 0); + float dx_max_sort = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + if (flags & ~c->progeny[k]->sorted || + c->progeny[k]->dx_max_sort > c->dmin * space_maxreldx) + runner_do_sort(r, c->progeny[k], flags, 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->dx_max_sort); + } } + c->dx_max_sort = dx_max_sort; /* Loop over the 13 different sort arrays. */ - for (j = 0; j < 13; j++) { + for (int j = 0; j < 13; j++) { /* Has this sort array been flagged? */ if (!(flags & (1 << j))) continue; /* Init the particle index offsets. */ - for (off[0] = 0, k = 1; k < 8; k++) + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) if (c->progeny[k - 1] != NULL) off[k] = off[k - 1] + c->progeny[k - 1]->count; else off[k] = off[k - 1]; /* Init the entries and indices. */ - for (k = 0; k < 8; k++) { + int inds[8]; + for (int k = 0; k < 8; k++) { inds[k] = k; if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)]; @@ -360,17 +396,17 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { } /* Sort the buffer. */ - for (i = 0; i < 7; i++) - for (k = i + 1; k < 8; k++) + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) if (buff[inds[k]] < buff[inds[i]]) { - temp_i = inds[i]; + int temp_i = inds[i]; inds[i] = inds[k]; inds[k] = temp_i; } /* For each entry in the new sort list. */ finger = &sort[j * (count + 1)]; - for (ind = 0; ind < count; ind++) { + for (int ind = 0; ind < count; ind++) { /* Copy the minimum into the new sort array. */ finger[ind].d = buff[inds[0]]; @@ -381,8 +417,8 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { buff[inds[0]] = fingers[inds[0]]->d; /* Find the smallest entry. */ - for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - temp_i = inds[k - 1]; + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; inds[k - 1] = inds[k]; inds[k] = temp_i; } @@ -403,12 +439,19 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { /* Otherwise, just sort. */ else { + /* Reset the sort distance if we are in a local cell */ + if (xparts != NULL) { + for (int k = 0; k < count; k++) { + xparts[k].x_diff_sort[0] = 0.0f; + xparts[k].x_diff_sort[1] = 0.0f; + xparts[k].x_diff_sort[2] = 0.0f; + } + } + /* Fill the sort array. */ - for (k = 0; k < count; k++) { - px[0] = parts[k].x[0]; - px[1] = parts[k].x[1]; - px[2] = parts[k].x[2]; - for (j = 0; j < 13; j++) + for (int k = 0; k < count; k++) { + const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; + for (int j = 0; j < 13; j++) if (flags & (1 << j)) { sort[j * (count + 1) + k].i = k; sort[j * (count + 1) + k].d = px[0] * runner_shift[j][0] + @@ -418,26 +461,47 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { } /* Add the sentinel and sort. */ - for (j = 0; j < 13; j++) + for (int j = 0; j < 13; j++) if (flags & (1 << j)) { sort[j * (count + 1) + count].d = FLT_MAX; sort[j * (count + 1) + count].i = 0; runner_do_sort_ascending(&sort[j * (count + 1)], count); c->sorted |= (1 << j); } + + /* Finally, clear the dx_max_sort field of this cell. */ + c->dx_max_sort = 0.f; + + /* If this was not just an update, invalidate the sorts above this one. */ + if (c->ti_sort < r->e->ti_current) + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) + finger->sorted = 0; } + /* Update the sort timer. */ + c->ti_sort = r->e->ti_current; + #ifdef SWIFT_DEBUG_CHECKS /* Verify the sorting. */ - for (j = 0; j < 13; j++) { + for (int j = 0; j < 13; j++) { if (!(flags & (1 << j))) continue; finger = &sort[j * (count + 1)]; - for (k = 1; k < count; k++) { + for (int k = 1; k < count; k++) { if (finger[k].d < finger[k - 1].d) error("Sorting failed, ascending array."); if (finger[k].i >= count) error("Sorting failed, indices borked."); } } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->sorted & ~c->sorted) error("Inconsistent sort flags."); + } #endif if (clock) TIMER_TOC(timer_dosort); @@ -480,63 +544,6 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_init_grav); } -/** - * @brief Initialize the particles before the density calculation. - * - * @param r The runner thread. - * @param c The cell. - * @param timer 1 if the time is to be recorded. - */ -void runner_do_init(struct runner *r, struct cell *c, int timer) { - - struct part *restrict parts = c->parts; - struct gpart *restrict gparts = c->gparts; - const int count = c->count; - const int gcount = c->gcount; - const struct engine *e = r->e; - const struct space *s = e->s; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_init(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[i]; - - if (part_is_active(p, e)) { - - /* Get ready for a density calculation */ - hydro_init_part(p, &s->hs); - } - } - - /* Loop over the gparts in this cell. */ - for (int i = 0; i < gcount; i++) { - - /* Get a direct pointer on the part. */ - struct gpart *restrict gp = &gparts[i]; - - if (gpart_is_active(gp, e)) { - - /* Get ready for a density calculation */ - gravity_init_gpart(gp); - } - } - } - - if (timer) TIMER_TOC(timer_init); -} - /** * @brief Intermediate task after the gradient loop that does final operations * on the gradient quantities and optionally slope limits the gradients @@ -757,9 +764,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (count) { - message("Smoothing length failed to converge on %i particles.", count); - - error("Aborting...."); + error("Smoothing length failed to converge on %i particles.", count); } #else if (count) @@ -784,11 +789,8 @@ static void runner_do_unskip(struct cell *c, struct engine *e) { /* Ignore empty cells. */ if (c->count == 0 && c->gcount == 0) return; - /* Unskip any active tasks. */ - if (cell_is_active(c, e)) { - const int forcerebuild = cell_unskip_tasks(c, &e->sched); - if (forcerebuild) atomic_inc(&e->forcerebuild); - } + /* Skip inactive cells. */ + if (!cell_is_active(c, e)) return; /* Recurse */ if (c->split) { @@ -799,6 +801,10 @@ static void runner_do_unskip(struct cell *c, struct engine *e) { } } } + + /* Unskip any active tasks. */ + const int forcerebuild = cell_unskip_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); } /** @@ -1356,9 +1362,8 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { if (part_is_active(p, e)) { - /* First, finish the force loop */ + /* Finish the force loop */ hydro_end_force(p); - if (p->gpart != NULL) gravity_end_force(p->gpart, const_G); } } @@ -1368,25 +1373,44 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { /* Get a handle on the gpart. */ struct gpart *restrict gp = &gparts[k]; - if (gp->type == swift_type_dark_matter) { + if (gpart_is_active(gp, e)) { - if (gpart_is_active(gp, e)) gravity_end_force(gp, const_G); - } + /* Finish the force calculation */ + gravity_end_force(gp, const_G); + +#ifdef SWIFT_NO_GRAVITY_BELOW_ID + /* Cancel gravity forces of these particles */ + if ((gp->type == swift_type_dark_matter && + gp->id_or_neg_offset < SWIFT_NO_GRAVITY_BELOW_ID) || + (gp->type == swift_type_gas && + parts[-gp->id_or_neg_offset].id < SWIFT_NO_GRAVITY_BELOW_ID) || + (gp->type == swift_type_star && + sparts[-gp->id_or_neg_offset].id < SWIFT_NO_GRAVITY_BELOW_ID)) { + + /* Don't move ! */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; + } +#endif #ifdef SWIFT_DEBUG_CHECKS - if (e->policy & engine_policy_self_gravity && gpart_is_active(gp, e)) { + if (e->policy & engine_policy_self_gravity) { - /* Check that this gpart has interacted with all the other particles - * (via direct or multipoles) in the box */ - gp->num_interacted++; - if (gp->num_interacted != (long long)e->s->nr_gparts) - error( - "g-particle (id=%lld, type=%d) did not interact gravitationally " - "with all other gparts gp->num_interacted=%lld, total_gparts=%zd", - gp->id_or_neg_offset, gp->type, gp->num_interacted, - e->s->nr_gparts); - } + /* Check that this gpart has interacted with all the other + * particles (via direct or multipoles) in the box */ + gp->num_interacted++; + if (gp->num_interacted != (long long)e->s->nr_gparts) + error( + "g-particle (id=%lld, type=%d) did not interact " + "gravitationally " + "with all other gparts gp->num_interacted=%lld, " + "total_gparts=%zd", + gp->id_or_neg_offset, gp->type, gp->num_interacted, + e->s->nr_gparts); + } #endif + } } /* Loop over the star particles in this cell. */ @@ -1396,9 +1420,8 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { struct spart *restrict sp = &sparts[k]; if (spart_is_active(sp, e)) { - /* First, finish the force loop */ + /* Finish the force loop */ star_end_force(sp); - gravity_end_force(sp->gpart, const_G); } } } @@ -1429,6 +1452,9 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int timer) { timebin_t time_bin_max = 0; float h_max = 0.f; + /* Clear this cell's sorted mask. */ + c->sorted = 0; + /* If this cell is a leaf, collect the particle data. */ if (!c->split) { @@ -1686,7 +1712,7 @@ void *runner_main(void *data) { if (!cell_is_active(ci, e) && t->type != task_type_sort && t->type != task_type_send && t->type != task_type_recv && - t->type != task_type_kick1) + t->type != task_type_kick1 && t->type != task_type_drift) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld", @@ -1806,9 +1832,6 @@ void *runner_main(void *data) { case task_type_init_grav: runner_do_init_grav(r, ci, 1); break; - case task_type_init: - runner_do_init(r, ci, 1); - break; case task_type_ghost: runner_do_ghost(r, ci, 1); break; diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 37147d32a901ed97ede1825ee232c41aa4f41fc6..b91a949e0e0910f61bc0b8d5fd8a283dcb24835d 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -50,8 +50,11 @@ #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) -#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive, f) -#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION) +#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) +#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) + +#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) +#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) #define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f) #define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION) @@ -110,21 +113,153 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) -#include "runner_doiact_nosort.h" - /** - * @brief Compute the interactions between a cell pair. + * @brief Compute the interactions between a cell pair (non-symmetric). * * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. */ -void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { +void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { const struct engine *e = r->e; +#ifndef SWIFT_DEBUG_CHECKS error("Don't use in actual runs ! Slow code !"); +#endif + +#ifdef WITH_VECTORIZATION + int icount = 0; + float r2q[VEC_SIZE] __attribute__((aligned(16))); + float hiq[VEC_SIZE] __attribute__((aligned(16))); + float hjq[VEC_SIZE] __attribute__((aligned(16))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#endif + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + +#ifndef WITH_VECTORIZATION + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = dx[0]; + dxq[3 * icount + 1] = dx[1]; + dxq[3 * icount + 2] = dx[2]; + hiq[icount] = hi; + hjq[icount] = pj->h; + piq[icount] = pi; + pjq[icount] = pj; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + if (r2 < pj->h * pj->h * kernel_gamma2) { + +#ifndef WITH_VECTORIZATION + + for (int k = 0; k < 3; k++) dx[k] = -dx[k]; + IACT_NONSYM(r2, dx, pj->h, hi, pj, pi); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = -dx[0]; + dxq[3 * icount + 1] = -dx[1]; + dxq[3 * icount + 2] = -dx[2]; + hiq[icount] = pj->h; + hjq[icount] = hi; + piq[icount] = pj; + pjq[icount] = pi; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + +#ifdef WITH_VECTORIZATION + /* Pick up any leftovers. */ + if (icount > 0) + for (int k = 0; k < icount; k++) + IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); +#endif + + TIMER_TOC(TIMER_DOPAIR); +} + +void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + +#ifndef SWIFT_DEBUG_CHECKS +// error("Don't use in actual runs ! Slow code !"); +#endif #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -225,7 +360,9 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; +#ifndef SWIFT_DEBUG_CHECKS error("Don't use in actual runs ! Slow code !"); +#endif #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -325,9 +462,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, struct cell *restrict cj) { - struct engine *e = r->e; - - error("Don't use in actual runs ! Slow code !"); + const struct engine *e = r->e; #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -362,6 +497,12 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; +#ifdef SWIFT_DEBUG_CHECKS + if (!part_is_active(pi, e)) + error("Trying to correct smoothing length of inactive particle !"); + +#endif + /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { @@ -376,6 +517,14 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2) { @@ -436,13 +585,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj); - return; - } -#endif - #ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -478,11 +620,15 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, sid = sortlistID[sid]; /* Have the cells been sorted? */ - if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort > space_maxreldx * cj->dmin) { + DOPAIR_SUBSET_NAIVE(r, ci, parts_i, ind, count, cj); + return; + } /* Pick-out the sorted lists. */ const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; - const float dxj = cj->dx_max; + const float dxj = cj->dx_max_sort; /* Parts are on the left? */ if (!flipped) { @@ -728,13 +874,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR1_NOSORT(r, ci, cj); - return; - } -#endif - #ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -749,16 +888,18 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); /* Get the cutoff shift. */ double rshift = 0.0; @@ -768,6 +909,29 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > + 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > + 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } +#endif /* SWIFT_DEBUG_CHECKS */ + /* Get some other useful values. */ const double hi_max = ci->h_max * kernel_gamma - rshift; const double hj_max = cj->h_max * kernel_gamma; @@ -777,7 +941,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { struct part *restrict parts_j = cj->parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const float dx_max = (ci->dx_max + cj->dx_max); + const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); if (cell_is_active(ci, e)) { @@ -948,13 +1112,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR2_NOSORT(r, ci, cj); - return; - } -#endif - #ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); @@ -975,16 +1132,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) error("Cell ci not drifted"); - if (!cell_is_drifted(cj, e)) error("Cell cj not drifted"); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the shift ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); /* Get the cutoff shift. */ double rshift = 0.0; @@ -1003,7 +1162,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct part *restrict parts_j = cj->parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const double dx_max = (ci->dx_max + cj->dx_max); + const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); /* Collect the number of parts left and right below dt. */ int countdt_i = 0, countdt_j = 0; @@ -1400,7 +1559,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); struct part *restrict parts = c->parts; const int count = c->count; @@ -1876,7 +2035,8 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Recurse? */ if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + + cj->dx_max_sort < h / 2) { /* Different types of flags. */ @@ -2080,9 +2240,17 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Otherwise, compute the pair directly. */ else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort > ci->dmin * space_maxreldx) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort > cj->dmin * space_maxreldx) + runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ #if (DOPAIR1 == runner_dopair1_density) && defined(WITH_VECTORIZATION) && \ @@ -2110,10 +2278,6 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { /* Should we even bother? */ if (ci->count == 0 || !cell_is_active(ci, r->e)) return; -#ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(ci, r->e); -#endif - /* Recurse? */ if (ci->split) { @@ -2129,6 +2293,10 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { /* Otherwise, compute self-interaction. */ else { + + /* Drift the cell to the current timestep if needed. */ + if (!cell_is_drifted(ci, r->e)) cell_drift_particles(ci, r->e); + #if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ defined(GADGET2_SPH) runner_doself1_density_vec(r, ci); @@ -2174,7 +2342,8 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Recurse? */ if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + + cj->dx_max_sort < h / 2) { /* Different types of flags. */ @@ -2378,9 +2547,17 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Otherwise, compute the pair directly. */ else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort > ci->dmin * space_maxreldx) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort > cj->dmin * space_maxreldx) + runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ DOPAIR2(r, ci, cj); @@ -2436,15 +2613,6 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, struct cell *sub = NULL; for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) { - // if ( parts[ ind[ 0 ] ].x[0] >= ci->progeny[k]->loc[0] && - // parts[ ind[ 0 ] ].x[0] <= ci->progeny[k]->loc[0] + - // ci->progeny[k]->width[0] && - // parts[ ind[ 0 ] ].x[1] >= ci->progeny[k]->loc[1] && - // parts[ ind[ 0 ] ].x[1] <= ci->progeny[k]->loc[1] + - // ci->progeny[k]->width[1] && - // parts[ ind[ 0 ] ].x[2] >= ci->progeny[k]->loc[2] && - // parts[ ind[ 0 ] ].x[2] <= ci->progeny[k]->loc[2] + - // ci->progeny[k]->width[2] ) { if (&parts[ind[0]] >= &ci->progeny[k]->parts[0] && &parts[ind[0]] < &ci->progeny[k]->parts[ci->progeny[k]->count]) { sub = ci->progeny[k]; @@ -2480,7 +2648,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, /* Recurse? */ if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + + cj->dx_max_sort < h / 2) { /* Get the type of pair if not specified explicitly. */ @@ -3010,11 +3179,9 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); new_sid = sortlistID[new_sid]; - /* Do any of the cells need to be sorted first? */ - if (!(cj->sorted & (1 << new_sid))) - runner_do_sort(r, cj, (1 << new_sid), 1); + /* Do any of the cells need to be drifted first? */ + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); - /* Compute the interactions. */ DOPAIR_SUBSET(r, ci, parts, ind, count, cj); } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 065e99dba8ee08a6a4fb91245a9d53ffdc7caccc..13a55344d773e7fba000d680eae9866dffdd88e1 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -43,6 +43,10 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted."); +#endif + if (c->split) { /* Node case */ /* Add the field-tensor to all the 8 progenitors */ @@ -53,6 +57,11 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { /* Do we have a progenitor with any active g-particles ? */ if (cp != NULL && cell_is_active(cp, e)) { +#ifdef SWIFT_DEBUG_CHECKS + if (cp->ti_old_multipole != e->ti_current) + error("cp->multipole not drifted."); +#endif + /* Shift the field tensor */ gravity_L2L(&temp, &c->multipole->pot, cp->multipole->CoM, c->multipole->CoM, 0 * periodic); @@ -74,8 +83,16 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { struct gpart *gp = &gparts[i]; /* Update if active */ - if (gpart_is_active(gp, e)) + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gp->ti_drift != e->ti_current) + error("gpart not drifted to current time"); +#endif + gravity_L2P(&c->multipole->pot, c->multipole->CoM, gp); + } } } @@ -102,14 +119,17 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, TIMER_TIC; + /* Anything to do here? */ + if (!cell_is_active(ci, e)) return; + #ifdef SWIFT_DEBUG_CHECKS if (ci == cj) error("Interacting a cell with itself using M2L"); if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set."); -#endif - /* Anything to do here? */ - if (!cell_is_active(ci, e)) return; + if (ci->ti_old_multipole != e->ti_current) + error("ci->multipole not drifted."); +#endif /* Do we need to drift the multipole ? */ if (cj->ti_old_multipole != e->ti_current) cell_drift_multipole(cj, e); @@ -161,6 +181,10 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + /* Let's start by drifting things */ + if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + #if ICHECK > 0 for (int pid = 0; pid < gcount_i; pid++) { @@ -188,60 +212,80 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { /* MATTHIEU: Should we use local DP accumulators ? */ /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { + if (cell_is_active(ci, e)) { + for (int pid = 0; pid < gcount_i; pid++) { - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts_i[pid]; + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpi = &gparts_i[pid]; - if (!gpart_is_active(gpi, e)) continue; + if (!gpart_is_active(gpi, e)) continue; - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_j; pjd++) { - /* Get a hold of the jth part in cj. */ - const struct gpart *restrict gpj = &gparts_j[pjd]; + /* Get a hold of the jth part in cj. */ + const struct gpart *restrict gpj = &gparts_j[pjd]; - /* Compute the pairwise distance. */ - const float dx[3] = {gpi->x[0] - gpj->x[0], // x - gpi->x[1] - gpj->x[1], // y - gpi->x[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Compute the pairwise distance. */ + const float dx[3] = {gpi->x[0] - gpj->x[0], // x + gpi->x[1] - gpj->x[1], // y + gpi->x[2] - gpj->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Interact ! */ - runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact ! */ + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj); #ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; + gpi->num_interacted++; #endif + } } } /* Loop over all particles in cj... */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + if (cell_is_active(cj, e)) { + for (int pjd = 0; pjd < gcount_j; pjd++) { - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpj = &gparts_j[pjd]; + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpj = &gparts_j[pjd]; - if (!gpart_is_active(gpj, e)) continue; + if (!gpart_is_active(gpj, e)) continue; - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_i; pid++) { + /* Loop over every particle in the other cell. */ + for (int pid = 0; pid < gcount_i; pid++) { - /* Get a hold of the ith part in ci. */ - const struct gpart *restrict gpi = &gparts_i[pid]; + /* Get a hold of the ith part in ci. */ + const struct gpart *restrict gpi = &gparts_i[pid]; - /* Compute the pairwise distance. */ - const float dx[3] = {gpj->x[0] - gpi->x[0], // x - gpj->x[1] - gpi->x[1], // y - gpj->x[2] - gpi->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Compute the pairwise distance. */ + const float dx[3] = {gpj->x[0] - gpi->x[0], // x + gpj->x[1] - gpi->x[1], // y + gpj->x[2] - gpi->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Interact ! */ - runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact ! */ + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi); #ifdef SWIFT_DEBUG_CHECKS - gpj->num_interacted++; + gpj->num_interacted++; #endif + } } } @@ -273,6 +317,9 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { /* Anything to do here? */ if (!cell_is_active(c, e)) return; + /* Do we need to start by drifting things ? */ + if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + #if ICHECK > 0 for (int pid = 0; pid < gcount; pid++) { @@ -306,6 +353,14 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { gpi->x[2] - gpj->x[2]}; // z const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + /* Interact ! */ if (gpart_is_active(gpi, e) && gpart_is_active(gpj, e)) { @@ -403,7 +458,8 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, TIMER_TIC; /* Can we use M-M interactions ? */ - if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv)) { + if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv, + 0)) { /* MATTHIEU: make a symmetric M-M interaction function ! */ runner_dopair_grav_mm(r, ci, cj); runner_dopair_grav_mm(r, cj, ci); @@ -527,11 +583,6 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, } else { -#ifdef SWIFT_DEBUG_CHECKS - if (!cell_are_neighbours(ci, cj)) - error("Non-neighbouring cells in pair task !"); -#endif - runner_dopair_grav(r, ci, cj, 1); } } @@ -588,11 +639,20 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { if (ci == cj || cj->gcount == 0) continue; /* Check the multipole acceptance criterion */ - if (gravity_multipole_accept(ci->multipole, cj->multipole, - theta_crit_inv)) { + if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv, + 0)) { + /* Go for a (non-symmetric) M-M calculation */ runner_dopair_grav_mm(r, ci, cj); } + /* Is the criterion violated now but was OK at the last rebuild ? */ + else if (gravity_multipole_accept(ci->multipole, cj->multipole, + theta_crit_inv, 1)) { + + /* Alright, we have to take charge of that pair in a different way. */ + // MATTHIEU: We should actually open the tree-node here and recurse. + runner_dopair_grav_mm(r, ci, cj); + } } if (timer) TIMER_TOC(timer_dograv_long_range); diff --git a/src/scheduler.c b/src/scheduler.c index 62fce53762e0b3c84396dfb9c54190199b41753b..33fa1fe76c2c84b77a351846f3e0a14dc5b94404 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -65,6 +65,11 @@ void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; } */ void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb) { +#ifdef SWIFT_DEBUG_CHECKS + if (ta == NULL) error("Unlocking task is NULL."); + if (tb == NULL) error("Unlocked task is NULL."); +#endif + /* Get an index at which to store this unlock. */ const int ind = atomic_inc(&s->nr_unlocks); @@ -136,8 +141,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) { + ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID)) { t->type = task_type_none; t->subtype = task_subtype_none; t->cj = NULL; @@ -169,6 +173,19 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* convert to a self-subtask. */ t->type = task_type_sub_self; + /* Make sure we have a drift task (MATTHIEU temp. fix for gravity) */ + if (t->subtype == task_subtype_grav || + t->subtype == task_subtype_external_grav) { + lock_lock(&ci->lock); + if (ci->drift == NULL) + ci->drift = scheduler_addtask( + s, task_type_drift, task_subtype_none, 0, 0, ci, NULL, 0); + lock_unlock_blind(&ci->lock); + } + + /* Depend on local sorts on this cell. */ + if (ci->sorts != NULL) scheduler_addunlock(s, ci->sorts, t); + /* Otherwise, make tasks explicitly. */ } else { @@ -202,6 +219,15 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { } } + /* Otherwise, make sure the self task has a drift task. */ + else { + lock_lock(&ci->lock); + if (ci->drift == NULL) + ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, + 0, 0, ci, NULL, 0); + lock_unlock_blind(&ci->lock); + } + /* Pair interaction? */ } else if (t->type == task_type_pair && t->subtype != task_subtype_grav) { @@ -235,6 +261,10 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Make this task a sub task. */ t->type = task_type_sub_pair; + /* Depend on the sort tasks of both cells. */ + if (ci->sorts != NULL) scheduler_addunlock(s, ci->sorts, t); + if (cj->sorts != NULL) scheduler_addunlock(s, cj->sorts, t); + /* Otherwise, split it. */ } else { @@ -612,8 +642,11 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Otherwise, if not spilt, stitch-up the sorting. */ } else { - /* Create the sort for ci. */ + /* Create the drift and sort for ci. */ lock_lock(&ci->lock); + if (ci->drift == NULL && ci->nodeID == engine_rank) + ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, + 0, 0, ci, NULL, 0); if (ci->sorts == NULL) ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 1 << sid, 0, ci, NULL, 0); @@ -624,6 +657,9 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Create the sort for cj. */ lock_lock(&cj->lock); + if (cj->drift == NULL && cj->nodeID == engine_rank) + cj->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, + 0, 0, cj, NULL, 0); if (cj->sorts == NULL) cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, 1 << sid, 0, cj, NULL, 0); @@ -854,13 +890,13 @@ void scheduler_ranktasks(struct scheduler *s) { } /* Main loop. */ - for (int j = 0, rank = 0; left < nr_tasks; rank++) { + for (int j = 0, rank = 0; j < nr_tasks; rank++) { /* Did we get anything? */ if (j == left) error("Unsatisfiable task dependencies detected."); - const int left_old = left; /* Unlock the next layer of tasks. */ + const int left_old = left; for (; j < left_old; j++) { struct task *t = &tasks[tid[j]]; t->rank = rank; @@ -876,14 +912,14 @@ void scheduler_ranktasks(struct scheduler *s) { } } - /* Move back to the old left (like Sanders). */ + /* Move back to the old left (like Sanders!). */ j = left_old; } #ifdef SWIFT_DEBUG_CHECKS /* Verify that the tasks were ranked correctly. */ for (int k = 1; k < s->nr_tasks; k++) - if (tasks[tid[k - 1]].rank > tasks[tid[k - 1]].rank) + if (tasks[tid[k - 1]].rank > tasks[tid[k]].rank) error("Task ranking failed."); #endif } @@ -1002,9 +1038,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_timestep: cost = wscale * t->ci->count; break; - case task_type_init: - cost = wscale * t->ci->count; - break; default: cost = 0; break; @@ -1121,7 +1154,7 @@ void scheduler_start(struct scheduler *s) { } else if (cj == NULL) { /* self */ if (ci->ti_end_min == ti_current && t->skip && - t->type != task_type_sort && t->type) + t->type != task_type_sort && t->type != task_type_drift && t->type) error( "Task (type='%s/%s') should not have been skipped " "ti_current=%lld " @@ -1214,7 +1247,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_kick2: case task_type_drift: case task_type_timestep: - case task_type_init: qid = t->ci->super->owner; break; case task_type_pair: diff --git a/src/space.c b/src/space.c index 0ea1e4ab3299c29d3b47a1af718dc9cdcb47b285..0c67fe27ac1c7b2cae3672e7f0b1ce82be5fb804 100644 --- a/src/space.c +++ b/src/space.c @@ -206,11 +206,11 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->force = NULL; c->grav = NULL; c->dx_max = 0.0f; + c->dx_max_sort = 0.0f; c->sorted = 0; c->count = 0; c->gcount = 0; c->scount = 0; - c->init = NULL; c->init_grav = NULL; c->extra_ghost = NULL; c->ghost = NULL; @@ -2027,6 +2027,7 @@ void space_split_recursive(struct space *s, struct cell *c, cp->split = 0; cp->h_max = 0.0; cp->dx_max = 0.f; + cp->dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; @@ -2091,8 +2092,7 @@ void space_split_recursive(struct space *s, struct cell *c, const struct multipole *m = &cp->multipole->m_pole; /* Contribution to multipole */ - gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM, - s->periodic); + gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM); gravity_multipole_add(&c->multipole->m_pole, &temp); /* Upper limit of max CoM<->gpart distance */ @@ -2116,6 +2116,10 @@ void space_split_recursive(struct space *s, struct cell *c, /* Take minimum of both limits */ c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + c->multipole->r_max_rebuild = c->multipole->r_max; + c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; + c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; + c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; } /* Deal with gravity */ } @@ -2194,6 +2198,10 @@ void space_split_recursive(struct space *s, struct cell *c, c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; c->multipole->r_max = 0.; } + c->multipole->r_max_rebuild = c->multipole->r_max; + c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; + c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; + c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; } } diff --git a/src/space.h b/src/space.h index 8674c39e110694ec303584627840a7ee47644552..c5f588563e5a9fb4b6cb73ac1446514f8149794f 100644 --- a/src/space.h +++ b/src/space.h @@ -45,7 +45,7 @@ #define space_maxcount_default 10000 #define space_max_top_level_cells_default 12 #define space_stretch 1.10f -#define space_maxreldx 0.25f +#define space_maxreldx 0.1f /* Maximum allowed depth of cell splits. */ #define space_cell_maxdepth 52 diff --git a/src/task.c b/src/task.c index 40cb41ffe057ac78dc3b8b073e29eedb052ec2de..e8c35e49a57595a6415c60ce7071ae1c0e3f09b7 100644 --- a/src/task.c +++ b/src/task.c @@ -47,15 +47,27 @@ #include "lock.h" /* Task type names. */ -const char *taskID_names[task_type_count] = { - "none", "sort", "self", - "pair", "sub_self", "sub_pair", - "init", "init_grav", "ghost", - "extra_ghost", "drift", "kick1", - "kick2", "timestep", "send", - "recv", "grav_top_level", "grav_long_range", - "grav_mm", "grav_down", "cooling", - "sourceterms"}; +const char *taskID_names[task_type_count] = {"none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "ghost", + "extra_ghost", + "drift", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_top_level", + "grav_long_range", + "grav_mm", + "grav_down", + "cooling", + "sourceterms"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -152,7 +164,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( } break; - case task_type_init: case task_type_kick1: case task_type_kick2: case task_type_timestep: @@ -272,11 +283,6 @@ void task_unlock(struct task *t) { /* Act based on task type. */ switch (type) { - case task_type_init_grav: - cell_munlocktree(ci); - break; - - case task_type_init: case task_type_kick1: case task_type_kick2: case task_type_timestep: @@ -363,12 +369,6 @@ int task_lock(struct task *t) { #endif break; - case task_type_init_grav: - if (ci->mhold) return 0; - if (cell_mlocktree(ci) != 0) return 0; - break; - - case task_type_init: case task_type_kick1: case task_type_kick2: case task_type_timestep: diff --git a/src/task.h b/src/task.h index 02dbcbfe258b3db718b0e217fd2b65e56cc03f34..d454241494a209bedd239f1b0997e770be8048b9 100644 --- a/src/task.h +++ b/src/task.h @@ -34,6 +34,8 @@ /** * @brief The different task types. + * + * Be sure to update the taskID_names array in tasks.c if you modify this list! */ enum task_types { task_type_none = 0, @@ -42,7 +44,6 @@ enum task_types { task_type_pair, task_type_sub_self, task_type_sub_pair, - task_type_init, task_type_init_grav, task_type_ghost, task_type_extra_ghost, @@ -167,8 +168,9 @@ struct task { #endif #ifdef SWIFT_DEBUG_CHECKS - int ti_run; -#endif + /* When was this task last run? */ + integertime_t ti_run; +#endif /* SWIFT_DEBUG_CHECKS */ } SWIFT_STRUCT_ALIGN; diff --git a/tests/Makefile.am b/tests/Makefile.am index 8a75182323859cb5662b0f7d0e83bd0f25f78b4e..10343c05db2d6d1133ccdc847c6774c169414d83 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -23,7 +23,7 @@ AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS # List of programs and scripts to run in the test suite TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \ testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \ - testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \ + testParser.sh testSPHStep test125cells.sh testFFT \ testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \ testMatrixInversion testThreadpool testDump testLogger \ testVoronoi1D testVoronoi2D testVoronoi3D @@ -31,7 +31,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSPHStep testPair test27cells test125cells testParser \ - testKernel testKernelGrav testFFT testInteractions testMaths \ + testKernel testFFT testInteractions testMaths \ testSymmetry testThreadpool benchmarkInteractions \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger \ @@ -62,8 +62,6 @@ testParser_SOURCES = testParser.c testKernel_SOURCES = testKernel.c -testKernelGrav_SOURCES = testKernelGrav.c - testFFT_SOURCES = testFFT.c testInteractions_SOURCES = testInteractions.c diff --git a/tests/test125cells.c b/tests/test125cells.c index 21fc3f5407f90d9b75485d08bf1077e0a20e88b2..168b4838eab5b27f359ab927a7bae2240919e82f 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -188,8 +188,9 @@ void get_solution(const struct cell *main_cell, struct solution_part *solution, } } -void reset_particles(struct cell *c, enum velocity_field vel, - enum pressure_field press, float size, float density) { +void reset_particles(struct cell *c, struct hydro_space *hs, + enum velocity_field vel, enum pressure_field press, + float size, float density) { for (int i = 0; i < c->count; ++i) { @@ -198,6 +199,8 @@ void reset_particles(struct cell *c, enum velocity_field vel, set_velocity(p, vel, size); set_energy_state(p, press, size, density); + hydro_init_part(p, hs); + #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH) float volume = p->conserved.mass / density; #if defined(GIZMO_SPH) @@ -313,6 +316,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->count = count; cell->gcount = 0; cell->dx_max = 0.; + cell->dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -323,6 +327,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->ti_old = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; + cell->ti_sort = 0; // shuffle_particles(cell->parts, cell->count); @@ -519,7 +524,11 @@ int main(int argc, char *argv[]) { /* Build the infrastructure */ struct space space; - space.periodic = 0; + space.periodic = 1; + space.dim[0] = 3.; + space.dim[1] = 3.; + space.dim[2] = 3.; + hydro_space_init(&space.hs, &space); struct phys_const prog_const; prog_const.const_newton_G = 1.f; @@ -582,18 +591,13 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); - /* Start with a gentle kick */ - // runner_do_kick1(&runner, main_cell, 0); - - /* And a gentle drift */ - // runner_do_drift_particles(&runner, main_cell, 0); + /* Initialise the particles */ + for (int j = 0; j < 125; ++j) + runner_do_drift_particles(&runner, cells[j], 0); /* First, sort stuff */ for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0); - /* Initialise the particles */ - for (int j = 0; j < 125; ++j) runner_do_init(&runner, cells[j], 0); - /* Do the density calculation */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) @@ -657,8 +661,6 @@ int main(int argc, char *argv[]) { /* Finally, give a gentle kick */ runner_do_end_force(&runner, main_cell, 0); - // runner_do_kick2(&runner, main_cell, 0); - const ticks toc = getticks(); time += toc - tic; @@ -674,20 +676,21 @@ int main(int argc, char *argv[]) { message("SWIFT calculation took : %15lli ticks.", time / runs); for (int j = 0; j < 125; ++j) - reset_particles(cells[j], vel, press, size, rho); + reset_particles(cells[j], &space.hs, vel, press, size, rho); /* NOW BRUTE-FORCE CALCULATION */ const ticks tic = getticks(); - /* Kick the central cell */ - // runner_do_kick1(&runner, main_cell, 0); +/* Kick the central cell */ +// runner_do_kick1(&runner, main_cell, 0); - /* And drift it */ - runner_do_drift_particles(&runner, main_cell, 0); +/* And drift it */ +// runner_do_drift_particles(&runner, main_cell, 0); - /* Initialise the particles */ - for (int j = 0; j < 125; ++j) runner_do_init(&runner, cells[j], 0); +/* Initialise the particles */ +// for (int j = 0; j < 125; ++j) runner_do_drift_particles(&runner, cells[j], +// 0); /* Do the density calculation */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) diff --git a/tests/test27cells.c b/tests/test27cells.c index 9e79c097462203465c03ea056569c7f448125d7e..30767fd5038e5df3f1f8b1a717a3e54c728c351c 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -159,6 +159,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->h_max = h; cell->count = count; cell->dx_max = 0.; + cell->dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -169,6 +170,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->ti_old = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; + cell->ti_sort = 8; shuffle_particles(cell->parts, cell->count); @@ -407,13 +409,20 @@ int main(int argc, char *argv[]) { /* Build the infrastructure */ struct space space; - space.periodic = 0; + space.periodic = 1; + space.dim[0] = 3.; + space.dim[1] = 3.; + space.dim[2] = 3.; + + struct hydro_props hp; + hp.h_max = FLT_MAX; struct engine engine; engine.s = &space; engine.time = 0.1f; engine.ti_current = 8; engine.max_active_bin = num_time_bins; + engine.hydro_properties = &hp; struct runner runner; runner.e = &engine; @@ -429,6 +438,8 @@ int main(int argc, char *argv[]) { cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho, &partId, perturbation, vel); + runner_do_drift_particles(&runner, cells[i * 9 + j * 3 + k], 0); + runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0); } } diff --git a/tests/testVoronoi2D.c b/tests/testVoronoi2D.c index 7a35961952079494b0d1567db57abd29fa1df35b..509d3ab69976fa8618db389ebd87eedb9ea34409 100644 --- a/tests/testVoronoi2D.c +++ b/tests/testVoronoi2D.c @@ -107,8 +107,10 @@ int main() { Atot = 0.0f; /* print the cells to the stdout */ for (i = 0; i < TESTVORONOI2D_NUMCELL; ++i) { +#ifdef VORONOI_VERBOSE printf("Cell %i:\n", i); voronoi_print_cell(&cells[i]); +#endif voronoi_cell_finalize(&cells[i]); Atot += cells[i].volume; } @@ -185,8 +187,10 @@ int main() { Atot = 0.0f; /* print the cells to the stdout */ for (i = 0; i < 100; ++i) { +#ifdef VORONOI_VERBOSE printf("Cell %i:\n", i); voronoi_print_cell(&cells[i]); +#endif voronoi_cell_finalize(&cells[i]); Atot += cells[i].volume; } diff --git a/theory/Multipoles/potential.py b/theory/Multipoles/potential.py new file mode 100644 index 0000000000000000000000000000000000000000..4cb4b74b49a23997b6cfa139d11d044c6db8b81d --- /dev/null +++ b/theory/Multipoles/potential.py @@ -0,0 +1,139 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## +import matplotlib +matplotlib.use("Agg") +from pylab import * +from scipy import integrate +import distinct_colours as colours +from scipy.optimize import curve_fit +from scipy.optimize import fsolve +from matplotlib.font_manager import FontProperties +import numpy +import math + +params = {'axes.labelsize': 9, +'axes.titlesize': 10, +'font.size': 12, +'legend.fontsize': 10, +'xtick.labelsize': 9, +'ytick.labelsize': 9, +'text.usetex': True, +'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.115, +'figure.subplot.right' : 0.99 , +'figure.subplot.bottom' : 0.08 , +'figure.subplot.top' : 0.99 , +'figure.subplot.wspace' : 0. , +'figure.subplot.hspace' : 0. , +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + +# Parameters +epsilon = 2. +epsilon_plummer = epsilon +r_min = 0. +r_max = 4 +r_max_plot = 2.6 + +# Radius +r = linspace(r_min, r_max, 401) +r[0] += 1e-9 +u = r / epsilon + +# Newtonian solution +phi_newton = 1. / r +F_newton = 1. / r**2 + +# Softened potential +phi = np.zeros(np.size(r)) +W = np.zeros(np.size(r)) +F = np.zeros(np.size(r)) +phi_plummer = np.zeros(np.size(r)) +F_plummer = np.zeros(np.size(r)) +for i in range(np.size(r)): + if r[i] > epsilon: + phi[i] = 1. / r[i] + W[i] = 0. + F[i] = 1. / r[i]**2 + else: + phi[i] = (-1./epsilon) * (3.*u[i]**7 - 15.*u[i]**6 + 28.*u[i]**5 - 21.*u[i]**4 + 7.*u[i]**2 - 3.) + W[i] = (21. / 2.*math.pi) * (4.*u[i]**5 - 15.*u[i]**4 + 20.*u[i]**3 - 10.*u[i]**2 + 1.) / epsilon**6 + F[i] = (1./epsilon**2) * (21.*u[i]**6 - 90*u[i]**5 + 140.*u[i]**4 - 84.*u[i]**3 + 14*u[i]) + + phi_plummer[i] = (1./epsilon_plummer) * (1 + (u[i])**2)**(-1./2.) + F_plummer[i] = (1./epsilon_plummer**3) * r[i] / (1 + (u[i])**2)**(3./2.) + +figure() + +# Potential +subplot(311) +plot(r, phi_newton, 'g--', lw=0.8, label="${\\rm Newtonian}$") +plot(r, phi_plummer, 'b-.', lw=0.8, label="${\\rm Plummer}$") +plot(r, phi, 'r-', lw=1, label="${\\rm SWIFT}$") +plot([epsilon, epsilon], [0, 10], 'k:', alpha=0.5, lw=0.5) +legend(loc="upper right", frameon=False, handletextpad=0.1, handlelength=3.2, fontsize=8) +ylim(0, 2.1) +ylabel("$\\phi(r)$", labelpad=0) + + +xlim(0,r_max_plot) +xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""]) + +# Force +subplot(312) +plot(r, F_newton, 'g--', lw=0.8) +plot(r, F_plummer, 'b-.', lw=0.8) +plot(r, F, 'r-', lw=1) +plot([epsilon, epsilon], [0, 10], 'k:', alpha=0.5, lw=0.5) + +xlim(0,r_max_plot) +xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""]) + +ylim(0, 0.95) +ylabel("$|\\mathbf{\\nabla}\\phi(r)|$", labelpad=0) + +# Density +subplot(313) +plot(r, W, 'r-', lw=1) +plot([epsilon, epsilon], [0, 10], 'k:', alpha=0.5, lw=0.5) +xlim(0,r_max_plot) +xticks([0., 0.5, 1., 1.5, 2., 2.5], ["$0$", "$0.5$", "$1$", "$1.5$", "$2$", "$2.5$"]) +xlabel("$r$", labelpad=-1) + +ylim(0., 0.58) +ylabel("$\\rho(r)$", labelpad=0) + +savefig("potential.pdf") + + + + +#Construct potential +# phi = np.zeros(np.size(r)) +# for i in range(np.size(r)): +# if r[i] > 2*epsilon: +# phi[i] = 1./ r[i] +# elif r[i] > epsilon: +# phi[i] = -(1./epsilon) * ((32./3.)*u[i]**2 - (48./3.)*u[i]**3 + (38.4/4.)*u[i]**4 - (32./15.)*u[i]**5 + (2./30.)*u[i]**(-1) - (9/5.)) +# else: +# phi[i] = -(1./epsilon) * ((32./6.)*u[i]**2 - (38.4/4.)*u[i]**4 + (32./5.)*u[i]**4 - (7./5.))