diff --git a/configure.ac b/configure.ac index 74fede99f4fbf578af4e703cedaa42f2c278b037..84548743a97423946c38b99e4811afff74bac45a 100644 --- a/configure.ac +++ b/configure.ac @@ -842,10 +842,10 @@ esac # Gravity multipole order AC_ARG_WITH([multipole-order], [AS_HELP_STRING([--with-multipole-order=<order>], - [order of the multipole and gravitational field expansion @<:@ default: 4@:>@] + [order of the multipole and gravitational field expansion @<:@ default: 5@:>@] )], [with_multipole_order="$withval"], - [with_multipole_order="4"] + [with_multipole_order="5"] ) AC_DEFINE_UNQUOTED([SELF_GRAVITY_MULTIPOLE_ORDER], [$with_multipole_order], [Multipole order]) diff --git a/examples/EAGLE_6/eagle_6.yml b/examples/EAGLE_6/eagle_6.yml index f55ecc856953d4cb60a86e3461625318a1757693..346d2c0627ce2fdc1147d0d34fd4faab25b76559 100644 --- a/examples/EAGLE_6/eagle_6.yml +++ b/examples/EAGLE_6/eagle_6.yml @@ -30,7 +30,7 @@ Statistics: Gravity: eta: 0.025 # Constant dimensionless multiplier for time integration. theta: 0.7 # Opening angle (Multipole acceptance criterion) - epsilon: 0.0001 # Softening length (in internal units). + epsilon: 0.001 # Softening length (in internal units). # Parameters for the hydrodynamics scheme SPH: diff --git a/src/cell.c b/src/cell.c index 4502f5d265dc68540e16ed0e51e681cf5733f842..4b344d475482549c1168a32b5740b86d3a8cfad4 100644 --- a/src/cell.c +++ b/src/cell.c @@ -1290,45 +1290,18 @@ void cell_clean(struct cell *c) { if (c->progeny[k]) cell_clean(c->progeny[k]); } -/** - * @brief Checks whether a given cell needs drifting or not. - * - * @param c the #cell. - * @param e The #engine (holding current time information). - * - * @return 1 If the cell needs drifting, 0 otherwise. - */ -int cell_is_drift_needed(struct cell *c, const struct engine *e) { - - /* Do we have at least one active particle in the cell ?*/ - if (cell_is_active(c, e)) return 1; - - /* Loop over the pair tasks that involve this cell */ - for (struct link *l = c->density; l != NULL; l = l->next) { - - if (l->t->type != task_type_pair && l->t->type != task_type_sub_pair) - continue; - - /* Is the other cell in the pair active ? */ - if ((l->t->ci == c && cell_is_active(l->t->cj, e)) || - (l->t->cj == c && cell_is_active(l->t->ci, e))) - return 1; - } - - /* No neighbouring cell has active particles. Drift not necessary */ - return 0; -} - /** * @brief Clear the drift flags on the given cell. */ void cell_clear_drift_flags(struct cell *c, void *data) { c->do_drift = 0; c->do_sub_drift = 0; + c->do_grav_drift = 0; + c->do_grav_sub_drift = 0; } /** - * @brief Activate the drifts on the given cell. + * @brief Activate the #part drifts on the given cell. */ void cell_activate_drift_part(struct cell *c, struct scheduler *s) { @@ -1355,9 +1328,36 @@ void cell_activate_drift_part(struct cell *c, struct scheduler *s) { } /** - * @brief Activate the sorts up a cell hierarchy. + * @brief Activate the #gpart drifts on the given cell. */ +void cell_activate_drift_gpart(struct cell *c, struct scheduler *s) { + + /* If this cell is already marked for drift, quit early. */ + if (c->do_grav_drift) return; + /* Mark this cell for drifting. */ + c->do_grav_drift = 1; + + /* Set the do_grav_sub_drifts all the way up and activate the super drift + if this has not yet been done. */ + if (c == c->super) { + scheduler_activate(s, c->drift_gpart); + } else { + for (struct cell *parent = c->parent; + parent != NULL && !parent->do_grav_sub_drift; + parent = parent->parent) { + parent->do_grav_sub_drift = 1; + if (parent == c->super) { + scheduler_activate(s, parent->drift_gpart); + break; + } + } + } +} + +/** + * @brief Activate the sorts up a cell hierarchy. + */ void cell_activate_sorts_up(struct cell *c, struct scheduler *s) { if (c == c->super) { scheduler_activate(s, c->sorts); @@ -1401,7 +1401,13 @@ void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s) { } /** - * @brief Traverse a sub-cell task and activate the sort tasks along the way. + * @brief Traverse a sub-cell task and activate the hydro drift tasks that are + * required + * by a hydro task + * + * @param ci The first #cell we recurse in. + * @param cj The second #cell we recurse in. + * @param s The task #scheduler. */ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, struct scheduler *s) { @@ -1668,6 +1674,172 @@ void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, } } +/** + * @brief Traverse a sub-cell task and activate the gravity drift tasks that are + * required + * by a self gravity task. + * + * @param ci The first #cell we recurse in. + * @param cj The second #cell we recurse in. + * @param s The task #scheduler. + */ +void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s) { + /* Some constants */ + const struct space *sp = s->space; + const struct engine *e = sp->e; + const int periodic = sp->periodic; + const double dim[3] = {sp->dim[0], sp->dim[1], sp->dim[2]}; + const double theta_crit2 = e->gravity_properties->theta_crit2; + + /* Self interaction? */ + if (cj == NULL) { + + /* Do anything? */ + if (!cell_is_active(ci, e)) return; + + /* Recurse? */ + if (ci->split) { + + /* Loop over all progenies and pairs of progenies */ + for (int j = 0; j < 8; j++) { + if (ci->progeny[j] != NULL) { + cell_activate_subcell_grav_tasks(ci->progeny[j], NULL, s); + for (int k = j + 1; k < 8; k++) + if (ci->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci->progeny[j], ci->progeny[k], + s); + } + } + } else { + + /* We have reached the bottom of the tree: activate gpart drift */ + cell_activate_drift_gpart(ci, s); + } + } + + /* Pair interaction */ + else { + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + /* Recover the multipole information */ + struct gravity_tensors *const multi_i = ci->multipole; + struct gravity_tensors *const multi_j = cj->multipole; + const double ri_max = multi_i->r_max; + const double rj_max = multi_j->r_max; + + /* Get the distance between the CoMs */ + double dx = multi_i->CoM[0] - multi_j->CoM[0]; + double dy = multi_i->CoM[1] - multi_j->CoM[1]; + double dz = multi_i->CoM[2] - multi_j->CoM[2]; + + /* Apply BC */ + if (periodic) { + dx = nearest(dx, dim[0]); + dy = nearest(dy, dim[1]); + dz = nearest(dz, dim[2]); + } + const double r2 = dx * dx + dy * dy + dz * dz; + + /* Can we use multipoles ? */ + if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) { + + /* Ok, no need to drift anything */ + return; + } + /* Otherwise, activate the gpart drifts if we are at the bottom. */ + else if (!ci->split && !cj->split) { + + /* Activate the drifts if the cells are local. */ + if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + if (ci->nodeID == engine_rank) cell_activate_drift_gpart(ci, s); + if (cj->nodeID == engine_rank) cell_activate_drift_gpart(cj, s); + } + } + /* Ok, we can still recurse */ + else { + + if (ri_max > rj_max) { + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci->progeny[k], cj, s); + } + + } else if (cj->split) { + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci, cj->progeny[k], s); + } + + } else { + error("Fundamental error in the logic"); + } + } else if (rj_max >= ri_max) { + if (cj->split) { + + /* Loop over cj's children */ + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci, cj->progeny[k], s); + } + + } else if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + cell_activate_subcell_grav_tasks(ci->progeny[k], cj, s); + } + + } else { + error("Fundamental error in the logic"); + } + } + } + } +} + +/** + * @brief Traverse a sub-cell task and activate the gravity drift tasks that are + * required + * by an external gravity task. + * + * @param ci The #cell we recurse in. + * @param s The task #scheduler. + */ +void cell_activate_subcell_external_grav_tasks(struct cell *ci, + struct scheduler *s) { + + /* Some constants */ + const struct space *sp = s->space; + const struct engine *e = sp->e; + + /* Do anything? */ + if (!cell_is_active(ci, e)) return; + + /* Recurse? */ + if (ci->split) { + + /* Loop over all progenies (no need for pairs for self-gravity) */ + for (int j = 0; j < 8; j++) { + if (ci->progeny[j] != NULL) { + cell_activate_subcell_external_grav_tasks(ci->progeny[j], s); + } + } + } else { + + /* We have reached the bottom of the tree: activate gpart drift */ + cell_activate_drift_gpart(ci, s); + } +} + /** * @brief Un-skips all the tasks associated with a given cell and checks * if the space needs to be rebuilt. @@ -1693,8 +1865,13 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) { scheduler_activate(s, t); - /* Set the correct sorting flags */ - if (t->type == task_type_pair) { + /* Activate hydro drift */ + if (t->type == task_type_self) { + if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); + } + + /* Set the correct sorting flags and activate hydro drifts */ + else if (t->type == task_type_pair) { /* Store some values. */ atomic_or(&ci->requires_sorts, 1 << t->flags); atomic_or(&cj->requires_sorts, 1 << t->flags); @@ -1843,6 +2020,29 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { } } + /* Un-skip the gravity tasks involved with this cell. */ + for (struct link *l = c->grav; l != NULL; l = l->next) { + struct task *t = l->t; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + + /* Only activate tasks that involve a local active cell. */ + if ((cell_is_active(ci, e) && ci->nodeID == engine_rank) || + (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) { + scheduler_activate(s, t); + + /* Set the drifting flags */ + if (t->type == task_type_self && + t->subtype == task_subtype_external_grav) { + cell_activate_subcell_external_grav_tasks(t->ci, s); + } else if (t->type == task_type_self && t->subtype == task_subtype_grav) { + cell_activate_subcell_grav_tasks(t->ci, NULL, s); + } else if (t->type == task_type_pair) { + cell_activate_subcell_grav_tasks(t->ci, t->cj, s); + } + } + } + /* Unskip all the other task types. */ if (c->nodeID == engine_rank && cell_is_active(c, e)) { @@ -1850,15 +2050,12 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); for (struct link *l = c->force; l != NULL; l = l->next) scheduler_activate(s, l->t); - for (struct link *l = c->grav; l != NULL; l = l->next) - scheduler_activate(s, l->t); if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); if (c->ghost_in != NULL) scheduler_activate(s, c->ghost_in); if (c->ghost_out != NULL) scheduler_activate(s, c->ghost_out); if (c->ghost != NULL) scheduler_activate(s, c->ghost); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); - if (c->drift_gpart != NULL) scheduler_activate(s, c->drift_gpart); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); if (c->kick2 != NULL) scheduler_activate(s, c->kick2); if (c->timestep != NULL) scheduler_activate(s, c->timestep); @@ -1931,7 +2128,7 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { /* Check that we are actually going to move forward. */ if (ti_current < ti_old_part) error("Attempt to drift to the past"); -#endif // SWIFT_DEBUG_CHECKS +#endif /* Are we not in a leaf ? */ if (c->split && (force || c->do_sub_drift)) { @@ -2016,8 +2213,9 @@ void cell_drift_part(struct cell *c, const struct engine *e, int force) { * * @param c The #cell. * @param e The #engine (to get ti_current). + * @param force Drift the particles irrespective of the #cell flags. */ -void cell_drift_gpart(struct cell *c, const struct engine *e) { +void cell_drift_gpart(struct cell *c, const struct engine *e, int force) { const double timeBase = e->timeBase; const integertime_t ti_old_gpart = c->ti_old_gpart; @@ -2029,11 +2227,19 @@ void cell_drift_gpart(struct cell *c, const struct engine *e) { const double dt = (ti_current - ti_old_gpart) * timeBase; float dx_max = 0.f, dx2_max = 0.f; + /* Drift irrespective of cell flags? */ + force |= c->do_grav_drift; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we only drift local cells. */ + if (c->nodeID != engine_rank) error("Drifting a foreign cell is nope."); + /* Check that we are actually going to move forward. */ if (ti_current < ti_old_gpart) error("Attempt to drift to the past"); +#endif /* Are we not in a leaf ? */ - if (c->split) { + if (c->split && (force || c->do_grav_sub_drift)) { /* Loop over the progeny and collect their data. */ for (int k = 0; k < 8; k++) @@ -2041,13 +2247,19 @@ void cell_drift_gpart(struct cell *c, const struct engine *e) { struct cell *cp = c->progeny[k]; /* Recurse */ - cell_drift_gpart(cp, e); + cell_drift_gpart(cp, e, force); /* Update */ dx_max = max(dx_max, cp->dx_max_gpart); } - } else if (ti_current > ti_old_gpart) { + /* Store the values */ + c->dx_max_gpart = dx_max; + + /* Update the time of the last drift */ + c->ti_old_gpart = ti_current; + + } else if (!c->split && force && ti_current > ti_old_gpart) { /* Loop over all the g-particles in the cell */ const size_t nr_gparts = c->gcount; @@ -2087,16 +2299,16 @@ void cell_drift_gpart(struct cell *c, const struct engine *e) { /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); - } else { + /* Store the values */ + c->dx_max_gpart = dx_max; - dx_max = c->dx_max_gpart; + /* Update the time of the last drift */ + c->ti_old_gpart = ti_current; } - /* Store the values */ - c->dx_max_gpart = dx_max; - - /* Update the time of the last drift */ - c->ti_old_gpart = ti_current; + /* Clear the drift flags. */ + c->do_grav_drift = 0; + c->do_grav_sub_drift = 0; } /** @@ -2118,7 +2330,8 @@ void cell_drift_all_multipoles(struct cell *c, const struct engine *e) { if (ti_current < ti_old_multipole) error("Attempt to drift to the past"); /* Drift the multipole */ - if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt); + if (ti_current > ti_old_multipole) + gravity_drift(c->multipole, dt, c->dx_max_gpart); /* Are we not in a leaf ? */ if (c->split) { @@ -2153,7 +2366,8 @@ void cell_drift_multipole(struct cell *c, const struct engine *e) { /* Check that we are actually going to move forward. */ if (ti_current < ti_old_multipole) error("Attempt to drift to the past"); - if (ti_current > ti_old_multipole) gravity_drift(c->multipole, dt); + if (ti_current > ti_old_multipole) + gravity_drift(c->multipole, dt, c->dx_max_gpart); /* Update the time of the last drift */ c->ti_old_multipole = ti_current; diff --git a/src/cell.h b/src/cell.h index e97400623dbb7a66aee981d21883fe4d8f73406a..9c6bfa3431bba5f84bbdd16c4e6a1842f924523e 100644 --- a/src/cell.h +++ b/src/cell.h @@ -152,9 +152,13 @@ struct cell { /*! The multipole initialistation task */ struct task *init_grav; - /*! The ghost tasks */ + /*! Dependency implicit task for the ghost (in->ghost->out)*/ struct task *ghost_in; + + /*! Dependency implicit task for the ghost (in->ghost->out)*/ struct task *ghost_out; + + /*! The ghost task itself */ struct task *ghost; /*! The extra ghost task for complex hydro schemes */ @@ -311,6 +315,21 @@ struct cell { /*! Is the #spart data of this cell being used in a sub-cell? */ int shold; + /*! Values of dx_max before the drifts, used for sub-cell tasks. */ + float dx_max_old; + + /*! Values of h_max before the drifts, used for sub-cell tasks. */ + float h_max_old; + + /*! Values of dx_max_sort before the drifts, used for sub-cell tasks. */ + float dx_max_sort_old; + + /*! Bit mask of sort directions that will be needed in the next timestep. */ + unsigned int requires_sorts; + + /*! Bit mask of sorts that need to be computed for this cell. */ + unsigned int do_sort; + /*! Number of tasks that are associated with this cell. */ short int nr_tasks; @@ -323,22 +342,17 @@ struct cell { /*! The maximal depth of this cell and its progenies */ char maxdepth; - /*! Values of dx_max and h_max before the drifts, used for sub-cell tasks. */ - float dx_max_old; - float h_max_old; - float dx_max_sort_old; - - /* Bit mask of sort directions that will be needed in the next timestep. */ - unsigned int requires_sorts; - - /*! Does this cell need to be drifted? */ + /*! Does this cell need to be drifted (hydro)? */ char do_drift; - /*! Do any of this cell's sub-cells need to be drifted? */ + /*! Do any of this cell's sub-cells need to be drifted (hydro)? */ char do_sub_drift; - /*! Bit mask of sorts that need to be computed for this cell. */ - unsigned int do_sort; + /*! Does this cell need to be drifted (gravity)? */ + char do_grav_drift; + + /*! Do any of this cell's sub-cells need to be drifted (gravity)? */ + char do_grav_sub_drift; /*! Do any of this cell's sub-cells need to be sorted? */ char do_sub_sort; @@ -390,18 +404,22 @@ void cell_check_part_drift_point(struct cell *c, void *data); void cell_check_gpart_drift_point(struct cell *c, void *data); void cell_check_multipole_drift_point(struct cell *c, void *data); void cell_reset_task_counters(struct cell *c); -int cell_is_drift_needed(struct cell *c, const struct engine *e); int cell_unskip_tasks(struct cell *c, struct scheduler *s); void cell_set_super(struct cell *c, struct cell *super); void cell_drift_part(struct cell *c, const struct engine *e, int force); -void cell_drift_gpart(struct cell *c, const struct engine *e); +void cell_drift_gpart(struct cell *c, const struct engine *e, int force); void cell_drift_multipole(struct cell *c, const struct engine *e); void cell_drift_all_multipoles(struct cell *c, const struct engine *e); void cell_check_timesteps(struct cell *c); void cell_store_pre_drift_values(struct cell *c); void cell_activate_subcell_tasks(struct cell *ci, struct cell *cj, struct scheduler *s); +void cell_activate_subcell_grav_tasks(struct cell *ci, struct cell *cj, + struct scheduler *s); +void cell_activate_subcell_external_grav_tasks(struct cell *ci, + struct scheduler *s); void cell_activate_drift_part(struct cell *c, struct scheduler *s); +void cell_activate_drift_gpart(struct cell *c, struct scheduler *s); void cell_activate_sorts(struct cell *c, int sid, struct scheduler *s); void cell_clear_drift_flags(struct cell *c, void *data); void cell_set_super_mapper(void *map_data, int num_elements, void *extra_data); diff --git a/src/engine.c b/src/engine.c index 21ea5130e869072113661b2ef237fb66dc8c7977..93c430d611cf573c643b4cf94325fb97333381a7 100644 --- a/src/engine.c +++ b/src/engine.c @@ -156,6 +156,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { const int periodic = e->s->periodic; const int is_with_hydro = (e->policy & engine_policy_hydro); const int is_self_gravity = (e->policy & engine_policy_self_gravity); + const int is_external_gravity = (e->policy & engine_policy_external_gravity); const int is_with_cooling = (e->policy & engine_policy_cooling); const int is_with_sourceterms = (e->policy & engine_policy_sourceterms); @@ -171,11 +172,15 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { /* Local tasks only... */ if (c->nodeID == e->nodeID) { - /* Add the drift task. */ + /* Add the drift tasks corresponding to the policy. */ if (is_with_hydro) { c->drift_part = scheduler_addtask(s, task_type_drift_part, task_subtype_none, 0, 0, c, NULL); } + if (is_self_gravity || is_external_gravity) { + c->drift_gpart = scheduler_addtask(s, task_type_drift_gpart, + task_subtype_none, 0, 0, c, NULL); + } /* Add the two half kicks */ c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, @@ -191,6 +196,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->kick2, c->timestep); scheduler_addunlock(s, c->timestep, c->kick1); + /* Add the self-gravity tasks */ if (is_self_gravity) { /* Initialisation of the multipoles */ @@ -211,8 +217,10 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { scheduler_addunlock(s, c->grav_down, c->kick2); } - /* Generate the ghost tasks. */ + /* Add the hydrodynamics tasks */ if (is_with_hydro) { + + /* Generate the ghost tasks. */ c->ghost_in = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, /* implicit = */ 1, c, NULL); @@ -1720,7 +1728,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; const int cdim_ghost[3] = {s->cdim[0] / 4 + 1, s->cdim[1] / 4 + 1, s->cdim[2] / 4 + 1}; - const double theta_crit_inv = e->gravity_properties->theta_crit_inv; + const double theta_crit2 = e->gravity_properties->theta_crit2; struct cell *cells = s->cells_top; const int n_ghosts = cdim_ghost[0] * cdim_ghost[1] * cdim_ghost[2] * 2; @@ -1776,7 +1784,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, if (cj->nodeID != nodeID) continue; // MATTHIEU /* Recover the multipole information */ - struct gravity_tensors *const multi_j = cj->multipole; + const struct gravity_tensors *const multi_j = cj->multipole; /* Get the distance between the CoMs */ double dx = CoM_i[0] - multi_j->CoM[0]; @@ -1792,8 +1800,8 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, const double r2 = dx * dx + dy * dy + dz * dz; /* Are the cells too close for a MM interaction ? */ - if (!gravity_multipole_accept_rebuild(multi_i, multi_j, - theta_crit_inv, r2)) { + if (!gravity_M2L_accept(multi_i->r_max_rebuild, + multi_j->r_max_rebuild, theta_crit2, r2)) { /* Ok, we need to add a direct pair calculation */ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, @@ -1839,11 +1847,9 @@ void engine_make_self_gravity_tasks(struct engine *e) { /* Make the ghosts implicit and add the dependencies */ for (int n = 0; n < n_ghosts / 2; ++n) { ghosts[2 * n + 0] = scheduler_addtask( - sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL); + sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL); ghosts[2 * n + 1] = scheduler_addtask( - sched, task_type_grav_ghost, task_subtype_none, 0, 0, NULL, NULL); - ghosts[2 * n + 0]->implicit = 1; - ghosts[2 * n + 1]->implicit = 1; + sched, task_type_grav_ghost, task_subtype_none, 0, 1, NULL, NULL); scheduler_addunlock(sched, ghosts[2 * n + 0], s->grav_top_level); scheduler_addunlock(sched, s->grav_top_level, ghosts[2 * n + 1]); } @@ -2063,6 +2069,7 @@ static inline void engine_make_self_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> gravity --> grav_down --> kick */ + scheduler_addunlock(sched, c->super->drift_gpart, gravity); scheduler_addunlock(sched, c->super->init_grav, gravity); scheduler_addunlock(sched, gravity, c->super->grav_down); } @@ -2648,16 +2655,32 @@ void engine_marktasks_mapper(void *map_data, int num_elements, struct task *t = &tasks[ind]; /* Single-cell task? */ - if (t->type == task_type_self || t->type == task_type_ghost || - t->type == task_type_extra_ghost || t->type == task_type_cooling || - t->type == task_type_sourceterms || t->type == task_type_sub_self) { + if (t->type == task_type_self || t->type == task_type_sub_self) { + + /* Local pointer. */ + struct cell *ci = t->ci; + + if (ci->nodeID != engine_rank) error("Non-local self task found"); /* Set this task's skip. */ - if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + if (cell_is_active(ci, e)) scheduler_activate(s, t); + /* Activate the hydro drift */ + if (t->type == task_type_self && t->subtype == task_subtype_density) { + cell_activate_drift_part(ci, s); + } + /* Activate the gravity drift */ + else if (t->type == task_type_self && t->subtype == task_subtype_grav) { + cell_activate_subcell_grav_tasks(t->ci, NULL, s); + } /* Store current values of dx_max and h_max. */ - if (t->type == task_type_sub_self && t->subtype == task_subtype_density) { - cell_activate_subcell_tasks(t->ci, NULL, s); + else if (t->type == task_type_sub_self && + t->subtype == task_subtype_density) { + cell_activate_subcell_tasks(ci, NULL, s); + + } else if (t->type == task_type_sub_self && + t->subtype == task_subtype_grav) { + error("Invalid task sub-type encountered"); } } @@ -2668,34 +2691,42 @@ void engine_marktasks_mapper(void *map_data, int num_elements, struct cell *ci = t->ci; struct cell *cj = t->cj; - /* If this task does not involve any active cells, skip it. */ - if (!cell_is_active(t->ci, e) && !cell_is_active(t->cj, e)) continue; - /* Only activate tasks that involve a local active cell. */ if ((cell_is_active(ci, e) && ci->nodeID == engine_rank) || - (cj != NULL && cell_is_active(cj, e) && cj->nodeID == engine_rank)) { + (cell_is_active(cj, e) && cj->nodeID == engine_rank)) { scheduler_activate(s, t); /* Set the correct sorting flags */ if (t->type == task_type_pair && t->subtype == task_subtype_density) { + /* Store some values. */ atomic_or(&ci->requires_sorts, 1 << t->flags); atomic_or(&cj->requires_sorts, 1 << t->flags); ci->dx_max_sort_old = ci->dx_max_sort; cj->dx_max_sort_old = cj->dx_max_sort; - /* Activate the drift tasks. */ + /* Activate the hydro drift tasks. */ if (ci->nodeID == engine_rank) cell_activate_drift_part(ci, s); if (cj->nodeID == engine_rank) cell_activate_drift_part(cj, s); /* Check the sorts and activate them if needed. */ cell_activate_sorts(ci, t->flags, s); cell_activate_sorts(cj, t->flags, s); + + } else if (t->type == task_type_pair && + t->subtype == task_subtype_grav) { + /* Activate the gravity drift */ + cell_activate_subcell_grav_tasks(t->ci, t->cj, s); } + /* Store current values of dx_max and h_max. */ else if (t->type == task_type_sub_pair && t->subtype == task_subtype_density) { cell_activate_subcell_tasks(t->ci, t->cj, s); + + } else if (t->type == task_type_sub_pair && + t->subtype == task_subtype_grav) { + error("Invalid task sub-type encountered"); } } @@ -2828,19 +2859,24 @@ void engine_marktasks_mapper(void *map_data, int num_elements, } } - /* Kick/Drift/init ? */ - if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_drift_gpart || t->type == task_type_init_grav) { + /* Kick/init ? */ + else if (t->type == task_type_kick1 || t->type == task_type_kick2 || + t->type == task_type_init_grav) { + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + } + + /* Hydro ghost tasks ? */ + else if (t->type == task_type_ghost || t->type == task_type_extra_ghost) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } - /* Gravity ? */ + /* Gravity stuff ? */ else if (t->type == task_type_grav_down || t->type == task_type_grav_long_range) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } - /* Periodic gravity ? */ + /* Periodic gravity stuff (Note this is not linked to a cell) ? */ else if (t->type == task_type_grav_top_level || t->type == task_type_grav_ghost) { scheduler_activate(s, t); @@ -2853,6 +2889,11 @@ void engine_marktasks_mapper(void *map_data, int num_elements, t->ci->s_updated = 0; if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } + + /* Subgrid tasks */ + else if (t->type == task_type_cooling || t->type == task_type_sourceterms) { + if (cell_is_active(t->ci, e)) scheduler_activate(s, t); + } } } @@ -3393,8 +3434,9 @@ void engine_skip_drift(struct engine *e) { struct task *t = &tasks[i]; - /* Skip everything that updates the particles */ - if (t->type == task_type_drift_part) t->skip = 1; + /* Skip everything that moves the particles */ + if (t->type == task_type_drift_part || t->type == task_type_drift_gpart) + t->skip = 1; } /* Run through the cells and clear some flags. */ @@ -3832,7 +3874,7 @@ void engine_do_drift_all_mapper(void *map_data, int num_elements, cell_drift_part(c, e, 1); /* Drift all the g-particles */ - cell_drift_gpart(c, e); + cell_drift_gpart(c, e, 1); /* Drift the multipoles */ if (e->policy & engine_policy_self_gravity) diff --git a/src/gravity.c b/src/gravity.c index f58bc1b7456bc5dfc95b4c976ebda8e1999ff3e0..05f4f3724414287e5aeaa6e932ff4df7810914d9 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -307,7 +307,10 @@ int gravity_exact_force_file_exits(const struct engine *e) { /* File name */ char file_name[100]; - sprintf(file_name, "gravity_checks_exact_step%d.dat", e->step); + if (e->s->periodic) + sprintf(file_name, "gravity_checks_exact_periodic_step%d.dat", e->step); + else + sprintf(file_name, "gravity_checks_exact_step%d.dat", e->step); /* Does the file exist ? */ if (access(file_name, R_OK | W_OK) == 0) { @@ -552,14 +555,20 @@ void gravity_exact_force_check(struct space *s, const struct engine *e, if (!gravity_exact_force_file_exits(e)) { char file_name_exact[100]; - sprintf(file_name_exact, "gravity_checks_exact_step%d.dat", e->step); + if (s->periodic) + sprintf(file_name_exact, "gravity_checks_exact_periodic_step%d.dat", + e->step); + else + sprintf(file_name_exact, "gravity_checks_exact_step%d.dat", e->step); FILE *file_exact = fopen(file_name_exact, "w"); fprintf(file_exact, "# Gravity accuracy test - EXACT FORCES\n"); fprintf(file_exact, "# G= %16.8e\n", e->physical_constants->const_newton_G); fprintf(file_exact, "# N= %d\n", SWIFT_GRAVITY_FORCE_CHECKS); fprintf(file_exact, "# epsilon=%16.8e\n", e->gravity_properties->epsilon); - fprintf(file_exact, "# theta=%16.8e\n", e->gravity_properties->theta_crit); + fprintf(file_exact, "# periodic= %d\n", s->periodic); + fprintf(file_exact, "# Git Branch: %s\n", git_branch()); + fprintf(file_exact, "# Git Revision: %s\n", git_revision()); fprintf(file_exact, "# %16s %16s %16s %16s %16s %16s %16s\n", "id", "pos[0]", "pos[1]", "pos[2]", "a_exact[0]", "a_exact[1]", "a_exact[2]"); diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index d4a95540de17631ad445075d672d03a1236e34e3..811d6fc8f902530840bcce4cf378c72ce25d0f4f 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -21,232 +21,142 @@ #define SWIFT_DEFAULT_GRAVITY_IACT_H /* Includes. */ -#include "const.h" #include "kernel_gravity.h" #include "kernel_long_gravity.h" #include "multipole.h" -#include "vector.h" /** - * @brief Gravity forces between particles truncated by the long-range kernel + * @brief Computes the intensity of the force at a point generated by a + * point-mass. + * + * The returned quantity needs to be multiplied by the distance vector to obtain + * the force vector. + * + * @param r2 Square of the distance to the point-mass. + * @param h2 Square of the softening length. + * @param h_inv Inverse of the softening length. + * @param h_inv3 Cube of the inverse of the softening length. + * @param mass Mass of the point-mass. + * @param f_ij (return) The force intensity. */ -__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated( - float r2, const float *dx, struct gpart *gpi, struct gpart *gpj, - float rlr_inv) { - - /* Apply the gravitational acceleration. */ - const float r = sqrtf(r2); - const float ir = 1.f / r; - const float mi = gpi->mass; - const float mj = gpj->mass; - const float hi = gpi->epsilon; - const float hj = gpj->epsilon; - const float u_lr = r * rlr_inv; - float f_lr, fi, fj, W; - -#ifdef SWIFT_DEBUG_CHECKS - if (r == 0.f) error("Interacting particles with 0 distance"); -#endif - - /* Get long-range correction */ - kernel_long_grav_eval(u_lr, &f_lr); +__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full( + float r2, float h2, float h_inv, float h_inv3, float mass, float *f_ij) { - if (r >= hi) { - - /* Get Newtonian gravity */ - fi = mj * ir * ir * ir * f_lr; - - } else { - - const float hi_inv = 1.f / hi; - const float hi_inv3 = hi_inv * hi_inv * hi_inv; - const float ui = r * hi_inv; - - kernel_grav_eval(ui, &W); - - /* Get softened gravity */ - fi = mj * hi_inv3 * W * f_lr; - } + /* Get the inverse distance */ + const float r_inv = 1.f / sqrtf(r2); - if (r >= hj) { + /* Should we soften ? */ + if (r2 >= h2) { /* Get Newtonian gravity */ - fj = mi * ir * ir * ir * f_lr; + *f_ij = mass * r_inv * r_inv * r_inv; } else { - const float hj_inv = 1.f / hj; - const float hj_inv3 = hj_inv * hj_inv * hj_inv; - const float uj = r * hj_inv; + const float r = r2 * r_inv; + const float ui = r * h_inv; + float W_ij; - kernel_grav_eval(uj, &W); + kernel_grav_eval(ui, &W_ij); /* Get softened gravity */ - fj = mi * hj_inv3 * W * f_lr; + *f_ij = mass * h_inv3 * W_ij; } - - const float fidx[3] = {fi * dx[0], fi * dx[1], fi * dx[2]}; - gpi->a_grav[0] -= fidx[0]; - gpi->a_grav[1] -= fidx[1]; - gpi->a_grav[2] -= fidx[2]; - - const float fjdx[3] = {fj * dx[0], fj * dx[1], fj * dx[2]}; - gpj->a_grav[0] += fjdx[0]; - gpj->a_grav[1] += fjdx[1]; - gpj->a_grav[2] += fjdx[2]; } /** - * @brief Gravity forces between particles + * @brief Computes the intensity of the force at a point generated by a + * point-mass truncated for long-distance periodicity. + * + * The returned quantity needs to be multiplied by the distance vector to obtain + * the force vector. + * + * @param r2 Square of the distance to the point-mass. + * @param h2 Square of the softening length. + * @param h_inv Inverse of the softening length. + * @param h_inv3 Cube of the inverse of the softening length. + * @param mass Mass of the point-mass. + * @param rlr_inv Inverse of the mesh smoothing scale. + * @param f_ij (return) The force intensity. */ -__attribute__((always_inline)) INLINE static void runner_iact_grav_pp( - float r2, const float *dx, struct gpart *gpi, struct gpart *gpj) { - - /* Apply the gravitational acceleration. */ - const float r = sqrtf(r2); - const float ir = 1.f / r; - const float mi = gpi->mass; - const float mj = gpj->mass; - const float hi = gpi->epsilon; - const float hj = gpj->epsilon; - float fi, fj, W; - -#ifdef SWIFT_DEBUG_CHECKS - if (r == 0.f) error("Interacting particles with 0 distance"); -#endif - - if (r >= hi) { - - /* Get Newtonian gravity */ - fi = mj * ir * ir * ir; - - } else { - - const float hi_inv = 1.f / hi; - const float hi_inv3 = hi_inv * hi_inv * hi_inv; - const float ui = r * hi_inv; - - kernel_grav_eval(ui, &W); +__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated( + float r2, float h2, float h_inv, float h_inv3, float mass, float rlr_inv, + float *f_ij) { - /* Get softened gravity */ - fi = mj * hi_inv3 * W; - } + /* Get the inverse distance */ + const float r_inv = 1.f / sqrtf(r2); + const float r = r2 * r_inv; - if (r >= hj) { + /* Should we soften ? */ + if (r2 >= h2) { /* Get Newtonian gravity */ - fj = mi * ir * ir * ir; + *f_ij = mass * r_inv * r_inv * r_inv; } else { - const float hj_inv = 1.f / hj; - const float hj_inv3 = hj_inv * hj_inv * hj_inv; - const float uj = r * hj_inv; + const float r = r2 * r_inv; + const float ui = r * h_inv; + float W_ij; - kernel_grav_eval(uj, &W); + kernel_grav_eval(ui, &W_ij); /* Get softened gravity */ - fj = mi * hj_inv3 * W; + *f_ij = mass * h_inv3 * W_ij; } - const float fidx[3] = {fi * dx[0], fi * dx[1], fi * dx[2]}; - gpi->a_grav[0] -= fidx[0]; - gpi->a_grav[1] -= fidx[1]; - gpi->a_grav[2] -= fidx[2]; - - const float fjdx[3] = {fj * dx[0], fj * dx[1], fj * dx[2]}; - gpj->a_grav[0] += fjdx[0]; - gpj->a_grav[1] += fjdx[1]; - gpj->a_grav[2] += fjdx[2]; -} - -/** - * @brief Gravity forces between particles truncated by the long-range kernel - * (non-symmetric version) - */ -__attribute__((always_inline)) INLINE static void -runner_iact_grav_pp_truncated_nonsym(float r2, const float *dx, - struct gpart *gpi, const struct gpart *gpj, - float rlr_inv) { - - /* Apply the gravitational acceleration. */ - const float r = sqrtf(r2); - const float ir = 1.f / r; - const float mj = gpj->mass; - const float hi = gpi->epsilon; - const float u_lr = r * rlr_inv; - float f_lr, f, W; - -#ifdef SWIFT_DEBUG_CHECKS - if (r == 0.f) error("Interacting particles with 0 distance"); -#endif - /* Get long-range correction */ - kernel_long_grav_eval(u_lr, &f_lr); - - if (r >= hi) { - - /* Get Newtonian gravity */ - f = mj * ir * ir * ir * f_lr; - - } else { - - const float hi_inv = 1.f / hi; - const float hi_inv3 = hi_inv * hi_inv * hi_inv; - const float ui = r * hi_inv; - - kernel_grav_eval(ui, &W); - - /* Get softened gravity */ - f = mj * hi_inv3 * W * f_lr; - } - - const float fdx[3] = {f * dx[0], f * dx[1], f * dx[2]}; - - gpi->a_grav[0] -= fdx[0]; - gpi->a_grav[1] -= fdx[1]; - gpi->a_grav[2] -= fdx[2]; + const float u_lr = r * rlr_inv; + float corr_lr; + kernel_long_grav_eval(u_lr, &corr_lr); + *f_ij *= corr_lr; } /** - * @brief Gravity forces between particles (non-symmetric version) + * @brief Computes the force at a point generated by a multipole. + * + * This uses the quadrupole terms only and defaults to the monopole if + * the code is compiled with low-order gravity only. + * + * @param r_x x-component of the distance vector to the multipole. + * @param r_y y-component of the distance vector to the multipole. + * @param r_z z-component of the distance vector to the multipole. + * @param r2 Square of the distance vector to the multipole. + * @param h The softening length. + * @param h_inv Inverse of the softening length. + * @param m The multipole. + * @param f_x (return) The x-component of the acceleration. + * @param f_y (return) The y-component of the acceleration. + * @param f_z (return) The z-component of the acceleration. */ -__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_nonsym( - float r2, const float *dx, struct gpart *gpi, const struct gpart *gpj) { - - /* Apply the gravitational acceleration. */ - const float r = sqrtf(r2); - const float ir = 1.f / r; - const float mj = gpj->mass; - const float hi = gpi->epsilon; - float f, W; - -#ifdef SWIFT_DEBUG_CHECKS - if (r == 0.f) error("Interacting particles with 0 distance"); -#endif - - if (r >= hi) { - - /* Get Newtonian gravity */ - f = mj * ir * ir * ir; +__attribute__((always_inline)) INLINE static void runner_iact_grav_pm( + float r_x, float r_y, float r_z, float r2, float h, float h_inv, + const struct multipole *m, float *f_x, float *f_y, float *f_z) { - } else { +#if SELF_GRAVITY_MULTIPOLE_ORDER < 3 + runner_iact_grav_pp_full(r2, h * h, h_inv, h_inv3, m->M_000, f_ij); +#else - const float hi_inv = 1.f / hi; - const float hi_inv3 = hi_inv * hi_inv * hi_inv; - const float ui = r * hi_inv; + /* Get the inverse distance */ + const float r_inv = 1.f / sqrtf(r2); - kernel_grav_eval(ui, &W); + struct potential_derivatives_M2P pot; + compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, &pot); - /* Get softened gravity */ - f = mj * hi_inv3 * W; - } + /* 1st order terms (monopole) */ + *f_x = m->M_000 * pot.D_100; + *f_y = m->M_000 * pot.D_010; + *f_z = m->M_000 * pot.D_001; - const float fdx[3] = {f * dx[0], f * dx[1], f * dx[2]}; + /* 3rd order terms (quadrupole) */ + *f_x += m->M_200 * pot.D_300 + m->M_020 * pot.D_120 + m->M_002 * pot.D_102; + *f_y += m->M_200 * pot.D_210 + m->M_020 * pot.D_030 + m->M_002 * pot.D_012; + *f_z += m->M_200 * pot.D_201 + m->M_020 * pot.D_021 + m->M_002 * pot.D_003; + *f_x += m->M_110 * pot.D_210 + m->M_101 * pot.D_201 + m->M_011 * pot.D_111; + *f_y += m->M_110 * pot.D_120 + m->M_101 * pot.D_111 + m->M_011 * pot.D_021; + *f_z += m->M_110 * pot.D_111 + m->M_101 * pot.D_102 + m->M_011 * pot.D_012; - gpi->a_grav[0] -= fdx[0]; - gpi->a_grav[1] -= fdx[1]; - gpi->a_grav[2] -= fdx[2]; +#endif } #endif /* SWIFT_DEFAULT_GRAVITY_IACT_H */ diff --git a/src/gravity_cache.h b/src/gravity_cache.h index fd87be64315c2746bba566916a132d13dfac07ef..fdc89605765b460b355b3958e34287991be5ff1b 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -59,6 +59,12 @@ struct gravity_cache { /*! #gpart z acceleration. */ float *restrict a_z SWIFT_CACHE_ALIGN; + /*! Is this #gpart active ? */ + int *restrict active SWIFT_CACHE_ALIGN; + + /*! Can this #gpart use a M2P interaction ? */ + int *restrict use_mpole SWIFT_CACHE_ALIGN; + /*! Cache size */ int count; }; @@ -79,6 +85,8 @@ static INLINE void gravity_cache_clean(struct gravity_cache *c) { free(c->a_x); free(c->a_y); free(c->a_z); + free(c->active); + free(c->use_mpole); } c->count = 0; } @@ -97,24 +105,26 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { /* Size of the gravity cache */ const int padded_count = count - (count % VEC_SIZE) + VEC_SIZE; - const size_t sizeBytes = padded_count * sizeof(float); + const size_t sizeBytesF = padded_count * sizeof(float); + const size_t sizeBytesI = padded_count * sizeof(int); /* Delete old stuff if any */ gravity_cache_clean(c); - int error = 0; - error += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += - posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytes); - error += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytes); - - if (error != 0) - error("Couldn't allocate gravity cache, size: %d", padded_count); + int e = 0; + e += posix_memalign((void **)&c->x, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->y, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->z, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->epsilon, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->m, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->a_x, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->a_y, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->a_z, SWIFT_CACHE_ALIGNMENT, sizeBytesF); + e += posix_memalign((void **)&c->active, SWIFT_CACHE_ALIGNMENT, sizeBytesI); + e += + posix_memalign((void **)&c->use_mpole, SWIFT_CACHE_ALIGNMENT, sizeBytesI); + + if (e != 0) error("Couldn't allocate gravity cache, size: %d", padded_count); c->count = padded_count; } @@ -122,29 +132,36 @@ static INLINE void gravity_cache_init(struct gravity_cache *c, int count) { /** * @brief Fills a #gravity_cache structure with some #gpart and shift them. * + * Also checks whether the #gpart can use a M2P interaction instead of the + * more expensive P2P. + * + * @param max_active_bin The largest active bin in the current time-step. * @param c The #gravity_cache to fill. * @param gparts The #gpart array to read from. * @param gcount The number of particles to read. * @param gcount_padded The number of particle to read padded to the next * multiple of the vector length. * @param shift A shift to apply to all the particles. - * @param cell The cell the #gpart are in. + * @param CoM The position of the multipole. + * @param r_max2 The square of the multipole radius. + * @param theta_crit2 The square of the opening angle. + * @param cell The cell we play with (to get reasonable padding positions). */ -__attribute__((always_inline)) INLINE void gravity_cache_populate( - struct gravity_cache *c, const struct gpart *restrict gparts, int gcount, - int gcount_padded, const double shift[3], const struct cell *cell) { +__attribute__((always_inline)) INLINE static void gravity_cache_populate( + timebin_t max_active_bin, struct gravity_cache *c, + const struct gpart *restrict gparts, int gcount, int gcount_padded, + const double shift[3], const float CoM[3], float r_max2, float theta_crit2, + const struct cell *cell) { /* Make the compiler understand we are in happy vectorization land */ - float *restrict x = c->x; - float *restrict y = c->y; - float *restrict z = c->z; - float *restrict m = c->m; - float *restrict epsilon = c->epsilon; - swift_align_information(x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT); - swift_align_information(m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, c->use_mpole, + SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ @@ -154,68 +171,91 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( z[i] = (float)(gparts[i].x[2] - shift[2]); epsilon[i] = gparts[i].epsilon; m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); + + /* Check whether we can use the multipole instead of P-P */ + const float dx = x[i] - CoM[0]; + const float dy = y[i] - CoM[1]; + const float dz = z[i] - CoM[2]; + const float r2 = dx * dx + dy * dy + dz * dz; + use_mpole[i] = gravity_M2P_accept(r_max2, theta_crit2, r2); } #ifdef SWIFT_DEBUG_CHECKS if (gcount_padded < gcount) error("Padded counter smaller than counter"); #endif + /* Particles used for padding should get impossible positions + * that have a reasonable magnitude. We use the cell width for this */ + const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1], + -2. * cell->width[2]}; + /* Pad the caches */ for (int i = gcount; i < gcount_padded; ++i) { - x[i] = -3.f * cell->width[0]; - y[i] = -3.f * cell->width[0]; - z[i] = -3.f * cell->width[0]; + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; epsilon[i] = 0.f; m[i] = 0.f; + active[i] = 0; + use_mpole[i] = 0; } } /** - * @brief Fills a #gravity_cache structure with some #gpart. + * @brief Fills a #gravity_cache structure with some #gpart and shift them. * + * @param max_active_bin The largest active bin in the current time-step. * @param c The #gravity_cache to fill. * @param gparts The #gpart array to read from. * @param gcount The number of particles to read. * @param gcount_padded The number of particle to read padded to the next * multiple of the vector length. + * @param shift A shift to apply to all the particles. + * @param cell The cell we play with (to get reasonable padding positions). */ -__attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( - struct gravity_cache *c, const struct gpart *restrict gparts, int gcount, - int gcount_padded) { +__attribute__((always_inline)) INLINE static void +gravity_cache_populate_no_mpole(timebin_t max_active_bin, + struct gravity_cache *c, + const struct gpart *restrict gparts, int gcount, + int gcount_padded, const double shift[3], + const struct cell *cell) { /* Make the compiler understand we are in happy vectorization land */ - float *restrict x = c->x; - float *restrict y = c->y; - float *restrict z = c->z; - float *restrict m = c->m; - float *restrict epsilon = c->epsilon; - swift_align_information(x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT); - swift_align_information(m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ for (int i = 0; i < gcount; ++i) { - x[i] = (float)(gparts[i].x[0]); - y[i] = (float)(gparts[i].x[1]); - z[i] = (float)(gparts[i].x[2]); + x[i] = (float)(gparts[i].x[0] - shift[0]); + y[i] = (float)(gparts[i].x[1] - shift[1]); + z[i] = (float)(gparts[i].x[2] - shift[2]); epsilon[i] = gparts[i].epsilon; m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); } #ifdef SWIFT_DEBUG_CHECKS if (gcount_padded < gcount) error("Padded counter smaller than counter"); #endif + /* Particles used for padding should get impossible positions + * that have a reasonable magnitude. We use the cell width for this */ + const float pos_padded[3] = {-2. * cell->width[0], -2. * cell->width[1], + -2. * cell->width[2]}; /* Pad the caches */ for (int i = gcount; i < gcount_padded; ++i) { - x[i] = 0.f; - y[i] = 0.f; - z[i] = 0.f; + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; epsilon[i] = 0.f; m[i] = 0.f; + active[i] = 0; } } @@ -230,18 +270,18 @@ __attribute__((always_inline)) INLINE void gravity_cache_write_back( const struct gravity_cache *c, struct gpart *restrict gparts, int gcount) { /* Make the compiler understand we are in happy vectorization land */ - float *restrict a_x = c->a_x; - float *restrict a_y = c->a_y; - float *restrict a_z = c->a_z; - swift_align_information(a_x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(a_y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, c->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, c->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, c->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); /* Write stuff back to the particles */ for (int i = 0; i < gcount; ++i) { - gparts[i].a_grav[0] += a_x[i]; - gparts[i].a_grav[1] += a_y[i]; - gparts[i].a_grav[2] += a_z[i]; + if (active[i]) { + gparts[i].a_grav[0] += a_x[i]; + gparts[i].a_grav[1] += a_y[i]; + gparts[i].a_grav[2] += a_z[i]; + } } } diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h index 8c8379f74f5fc67d3671f0154b2aeacbc35ea9f1..cf8aa54338b2e87e8bf5f2cc453ad7417eea5804 100644 --- a/src/gravity_derivatives.h +++ b/src/gravity_derivatives.h @@ -32,1056 +32,358 @@ /* Local headers. */ #include "inline.h" - -/*************************/ -/* 0th order derivatives */ -/*************************/ - -/** - * @brief \f$ \phi(r_x, r_y, r_z) \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_000(double r_x, - double r_y, - double r_z, - double r_inv) { - - return r_inv; -} - -/*************************/ -/* 1st order derivatives */ -/*************************/ - -/** - * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_100(double r_x, - double r_y, - double r_z, - double r_inv) { - - return -r_x * r_inv * r_inv * r_inv; -} - -/** - * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_010(double r_x, - double r_y, - double r_z, - double r_inv) { - - return -r_y * r_inv * r_inv * r_inv; -} - -/** - * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_001(double r_x, - double r_y, - double r_z, - double r_inv) { - - return -r_z * r_inv * r_inv * r_inv; -} - -/*************************/ -/* 2nd order derivatives */ -/*************************/ - -/** - * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x^2} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_200(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv3 = r_inv * r_inv2; - const double r_inv5 = r_inv3 * r_inv2; - return 3. * r_x * r_x * r_inv5 - r_inv3; -} - -/** - * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y^2} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_020(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv3 = r_inv * r_inv2; - const double r_inv5 = r_inv3 * r_inv2; - return 3. * r_y * r_y * r_inv5 - r_inv3; -} - -/** - * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_z^2} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_002(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv3 = r_inv * r_inv2; - const double r_inv5 = r_inv3 * r_inv2; - return 3. * r_z * r_z * r_inv5 - r_inv3; -} - -/** - * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_110(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - return 3. * r_x * r_y * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_101(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - return 3. * r_x * r_z * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_011(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - return 3. * r_y * r_z * r_inv5; -} - -/*************************/ -/* 3rd order derivatives */ -/*************************/ - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^3} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_300(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_x * r_x * r_x * r_inv7 + 9. * r_x * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^3} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_030(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_y * r_y * r_y * r_inv7 + 9. * r_y * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z^3} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_003(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_z * r_z * r_z * r_inv7 + 9. * r_z * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_y} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_210(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_x * r_x * r_y * r_inv7 + 3. * r_y * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_z} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_201(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_x * r_x * r_z * r_inv7 + 3. * r_z * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y^2} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_120(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_x * r_y * r_y * r_inv7 + 3. * r_x * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^2\partial r_z} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_021(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_z * r_y * r_y * r_inv7 + 3. * r_z * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z^2} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_102(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_x * r_z * r_z * r_inv7 + 3. * r_x * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z^2} - * \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_012(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv2 = r_inv * r_inv; - const double r_inv5 = r_inv2 * r_inv2 * r_inv; - const double r_inv7 = r_inv5 * r_inv2; - return -15. * r_y * r_z * r_z * r_inv7 + 3. * r_y * r_inv5; -} - -/** - * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z\partial - * r_y\partial r_z} \f$. - * - * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). - * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). - * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). - * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) - */ -__attribute__((always_inline)) INLINE static double D_111(double r_x, - double r_y, - double r_z, - double r_inv) { - const double r_inv3 = r_inv * r_inv * r_inv; - const double r_inv7 = r_inv3 * r_inv3 * r_inv; - return -15. * r_x * r_y * r_z * r_inv7; -} - -/*********************************/ -/* 4th order gravity derivatives */ -/*********************************/ - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_z^4 }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_004(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_z * r_z * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 * - (r_z * r_z) + - 3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0; - /* 5 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_y^1 \partial_z^3 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_013(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_z * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_y * r_z); - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_y^2 \partial_z^2 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_022(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_y * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_z * r_z) + - 3. * r_inv * r_inv * r_inv * r_inv * r_inv; - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_y^3 \partial_z^1 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_031(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_y * r_y * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_y * r_z); - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_y^4 }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_040(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_y * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 * - (r_y * r_y) + - 3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0; - /* 5 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_z^3 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_103(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_z * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x * r_z); - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^1 \partial_z^2 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_112(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_y * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_x * r_y); - /* 13 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^2 \partial_z^1 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_121(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_y * r_y * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_x * r_z); - /* 13 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^3 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_130(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_y * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x * r_y); - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_z^2 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_202(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_x * r_x) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_z * r_z) + - 3. * r_inv * r_inv * r_inv * r_inv * r_inv; - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^1 \partial_z^1 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_211(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_y * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_y * r_z); - /* 13 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^2 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_220(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_x * r_x) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - (r_y * r_y) + - 3. * r_inv * r_inv * r_inv * r_inv * r_inv; - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_z^1 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_301(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_x * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x * r_z); - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_y^1 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_310(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_x * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x * r_y); - /* 11 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^4}{ \partial_x^4 }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_400(double r_x, - double r_y, - double r_z, - double r_inv) { - return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_x * r_x) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 * - (r_x * r_x) + - 3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0; - /* 5 zero-valued terms not written out */ -} - -/*********************************/ -/* 5th order gravity derivatives */ -/*********************************/ - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_z^5 }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_005(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_z * r_z * r_z * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 10.0 * (r_z * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 * - (r_z); - /* 26 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_y^1 \partial_z^4 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_014(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_y * r_z * r_z * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 6.0 * (r_y * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_y); - /* 42 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_y^2 \partial_z^3 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_023(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_y * r_y * r_z * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_y * r_y * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_z * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_z); - /* 44 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_y^3 \partial_z^2 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_032(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_y * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_y * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_y); - /* 44 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_y^4 \partial_z^1 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_041(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 6.0 * (r_y * r_y * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_z); - /* 42 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_y^5 }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_050(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 10.0 * (r_y * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 * - (r_y); - /* 26 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_z^4 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_104(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_z * r_z * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 6.0 * (r_x * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x); - /* 42 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^1 \partial_z^3 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_113(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_y * r_z * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_y * r_z); - /* 48 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^2 \partial_z^2 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_122(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_y * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_x); - /* 48 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^3 \partial_z^1 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_131(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_y * r_z); - /* 48 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^4 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_140(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 6.0 * (r_x * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x); - /* 42 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_z^3 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_203(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_z * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_x * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_z * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_z); - /* 44 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^1 \partial_z^2 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_212(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_y); - /* 48 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^2 \partial_z^1 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_221(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_y * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_z); - /* 48 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^3 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_230(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_x * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_y * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_y); - /* 44 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_z^2 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_302(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_z * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_x) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_z * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x); - /* 44 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^1 \partial_z^1 - * }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_311(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_y * r_z); - /* 48 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^2 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_320(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * (r_x * r_x * r_x) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 3.0 * (r_x * r_y * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_x); - /* 44 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_z^1 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_401(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_z) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 6.0 * (r_x * r_x * r_z) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_z); - /* 42 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_y^1 }\phi(x, y, - * z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_410(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_y) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 6.0 * (r_x * r_x * r_y) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * - (r_y); - /* 42 zero-valued terms not written out */ -} - -/** - * @brief Compute \f$ \frac{\partial^5}{ \partial_x^5 }\phi(x, y, z} \f$. - * - * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) - */ -__attribute__((always_inline)) INLINE static double D_500(double r_x, - double r_y, - double r_z, - double r_inv) { - return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_x) + - 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * - r_inv * 10.0 * (r_x * r_x * r_x) - - 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 * - (r_x); - /* 26 zero-valued terms not written out */ +#include "kernel_gravity.h" + +/** + * @brief Structure containing all the derivatives of the potential field + * required for the M2L kernel + */ +struct potential_derivatives_M2L { + + /* 0th order terms */ + float D_000; + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + /* 1st order terms */ + float D_100, D_010, D_001; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + /* 2nd order terms */ + float D_200, D_020, D_002; + float D_110, D_101, D_011; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + + /* 3rd order terms */ + float D_300, D_030, D_003; + float D_210, D_201; + float D_120, D_021; + float D_102, D_012; + float D_111; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + + /* 4th order terms */ + float D_400, D_040, D_004; + float D_310, D_301; + float D_130, D_031; + float D_103, D_013; + float D_220, D_202, D_022; + float D_211, D_121, D_112; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + + /* 5th order terms */ + float D_005, D_014, D_023; + float D_032, D_041, D_050; + float D_104, D_113, D_122; + float D_131, D_140, D_203; + float D_212, D_221, D_230; + float D_302, D_311, D_320; + float D_401, D_410, D_500; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" +#endif +}; + +/** + * @brief Structure containing all the derivatives of the potential field + * required for the M2P kernel + */ +struct potential_derivatives_M2P { + + /* 1st order terms */ + float D_100, D_010, D_001; + + /* 3rd order terms */ + float D_300, D_030, D_003; + float D_210, D_201; + float D_120, D_021; + float D_102, D_012; + float D_111; +}; + +/** + * @brief Compute all the relevent derivatives of the softened and truncated + * gravitational potential for the M2L kernel. + * + * @param r_x x-component of distance vector + * @param r_y y-component of distance vector + * @param r_z z-component of distance vector + * @param r2 Square norm of distance vector + * @param r_inv Inverse norm of distance vector + * @param eps Softening length. + * @param eps_inv Inverse of softening length. + * @param pot (return) The structure containing all the derivatives. + */ +__attribute__((always_inline)) INLINE static void +compute_potential_derivatives_M2L(float r_x, float r_y, float r_z, float r2, + float r_inv, float eps, float eps_inv, + struct potential_derivatives_M2L *pot) { + + float Dt_1; +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + float Dt_3; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + float Dt_5; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + float Dt_7; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + float Dt_9; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + float Dt_11; +#endif + + /* Un-softened case */ + if (r2 > eps * eps) { + + Dt_1 = r_inv; +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + const float r_inv2 = r_inv * r_inv; + Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */ +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */ +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */ +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + Dt_9 = -7.f * Dt_7 * r_inv2; /* 105 / r^9 */ +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + Dt_11 = -9.f * Dt_9 * r_inv2; /* -945 / r^11 */ +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" +#endif + + } else { + const float r = r2 * r_inv; + const float u = r * eps_inv; + const float u_inv = r_inv * eps; + + Dt_1 = eps_inv * D_soft_1(u, u_inv); +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + const float eps_inv2 = eps_inv * eps_inv; + const float eps_inv3 = eps_inv * eps_inv2; + Dt_3 = -eps_inv3 * D_soft_3(u, u_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + const float eps_inv5 = eps_inv3 * eps_inv2; + Dt_5 = eps_inv5 * D_soft_5(u, u_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + const float eps_inv7 = eps_inv5 * eps_inv2; + Dt_7 = -eps_inv7 * D_soft_7(u, u_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + const float eps_inv9 = eps_inv7 * eps_inv2; + Dt_9 = eps_inv9 * D_soft_9(u, u_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + const float eps_inv11 = eps_inv9 * eps_inv2; + Dt_11 = -eps_inv11 * D_soft_11(u, u_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" +#endif + } + +/* Alright, let's get the full terms */ + +/* Compute some powers of r_x, r_y and r_z */ +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + const float r_x2 = r_x * r_x; + const float r_y2 = r_y * r_y; + const float r_z2 = r_z * r_z; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + const float r_x3 = r_x2 * r_x; + const float r_y3 = r_y2 * r_y; + const float r_z3 = r_z2 * r_z; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + const float r_x4 = r_x3 * r_x; + const float r_y4 = r_y3 * r_y; + const float r_z4 = r_z3 * r_z; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + const float r_x5 = r_x4 * r_x; + const float r_y5 = r_y4 * r_y; + const float r_z5 = r_z4 * r_z; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for order >5" +#endif + + /* Get the 0th order term */ + pot->D_000 = Dt_1; + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + /* 1st order derivatives */ + pot->D_100 = r_x * Dt_3; + pot->D_010 = r_y * Dt_3; + pot->D_001 = r_z * Dt_3; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + /* 2nd order derivatives */ + pot->D_200 = r_x2 * Dt_5 + Dt_3; + pot->D_020 = r_y2 * Dt_5 + Dt_3; + pot->D_002 = r_z2 * Dt_5 + Dt_3; + pot->D_110 = r_x * r_y * Dt_5; + pot->D_101 = r_x * r_z * Dt_5; + pot->D_011 = r_y * r_z * Dt_5; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + /* 3rd order derivatives */ + pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5; + pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5; + pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5; + pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5; + pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5; + pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5; + pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5; + pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5; + pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5; + pot->D_111 = r_x * r_y * r_z * Dt_7; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + /* 4th order derivatives */ + pot->D_400 = r_x4 * Dt_9 + 6.f * r_x2 * Dt_7 + 3.f * Dt_5; + pot->D_040 = r_y4 * Dt_9 + 6.f * r_y2 * Dt_7 + 3.f * Dt_5; + pot->D_004 = r_z4 * Dt_9 + 6.f * r_z2 * Dt_7 + 3.f * Dt_5; + pot->D_310 = r_x3 * r_y * Dt_9 + 3.f * r_x * r_y * Dt_7; + pot->D_301 = r_x3 * r_z * Dt_9 + 3.f * r_x * r_z * Dt_7; + pot->D_130 = r_y3 * r_x * Dt_9 + 3.f * r_y * r_x * Dt_7; + pot->D_031 = r_y3 * r_z * Dt_9 + 3.f * r_y * r_z * Dt_7; + pot->D_103 = r_z3 * r_x * Dt_9 + 3.f * r_z * r_x * Dt_7; + pot->D_013 = r_z3 * r_y * Dt_9 + 3.f * r_z * r_y * Dt_7; + pot->D_220 = r_x2 * r_y2 * Dt_9 + r_x2 * Dt_7 + r_y2 * Dt_7 + Dt_5; + pot->D_202 = r_x2 * r_z2 * Dt_9 + r_x2 * Dt_7 + r_z2 * Dt_7 + Dt_5; + pot->D_022 = r_y2 * r_z2 * Dt_9 + r_y2 * Dt_7 + r_z2 * Dt_7 + Dt_5; + pot->D_211 = r_x2 * r_y * r_z * Dt_9 + r_y * r_z * Dt_7; + pot->D_121 = r_y2 * r_x * r_z * Dt_9 + r_x * r_z * Dt_7; + pot->D_112 = r_z2 * r_x * r_y * Dt_9 + r_x * r_y * Dt_7; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + /* 5th order derivatives */ + pot->D_500 = r_x5 * Dt_11 + 10.f * r_x3 * Dt_9 + 15.f * r_x * Dt_7; + pot->D_050 = r_y5 * Dt_11 + 10.f * r_y3 * Dt_9 + 15.f * r_y * Dt_7; + pot->D_005 = r_z5 * Dt_11 + 10.f * r_z3 * Dt_9 + 15.f * r_z * Dt_7; + pot->D_410 = r_x4 * r_y * Dt_11 + 6.f * r_x2 * r_y * Dt_9 + 3.f * r_y * Dt_7; + pot->D_401 = r_x4 * r_z * Dt_11 + 6.f * r_x2 * r_z * Dt_9 + 3.f * r_z * Dt_7; + pot->D_140 = r_y4 * r_x * Dt_11 + 6.f * r_y2 * r_x * Dt_9 + 3.f * r_x * Dt_7; + pot->D_041 = r_y4 * r_z * Dt_11 + 6.f * r_y2 * r_z * Dt_9 + 3.f * r_z * Dt_7; + pot->D_104 = r_z4 * r_x * Dt_11 + 6.f * r_z2 * r_x * Dt_9 + 3.f * r_x * Dt_7; + pot->D_014 = r_z4 * r_y * Dt_11 + 6.f * r_z2 * r_y * Dt_9 + 3.f * r_y * Dt_7; + pot->D_320 = r_x3 * r_y2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_y2 * Dt_9 + + 3.f * r_x * Dt_7; + pot->D_302 = r_x3 * r_z2 * Dt_11 + r_x3 * Dt_9 + 3.f * r_x * r_z2 * Dt_9 + + 3.f * r_x * Dt_7; + pot->D_230 = r_y3 * r_x2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_x2 * Dt_9 + + 3.f * r_y * Dt_7; + pot->D_032 = r_y3 * r_z2 * Dt_11 + r_y3 * Dt_9 + 3.f * r_y * r_z2 * Dt_9 + + 3.f * r_y * Dt_7; + pot->D_203 = r_z3 * r_x2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_x2 * Dt_9 + + 3.f * r_z * Dt_7; + pot->D_023 = r_z3 * r_y2 * Dt_11 + r_z3 * Dt_9 + 3.f * r_z * r_y2 * Dt_9 + + 3.f * r_z * Dt_7; + pot->D_311 = r_x3 * r_y * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; + pot->D_131 = r_y3 * r_x * r_z * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; + pot->D_113 = r_z3 * r_x * r_y * Dt_11 + 3.f * r_x * r_y * r_z * Dt_9; + pot->D_122 = r_x * r_y2 * r_z2 * Dt_11 + r_x * r_y2 * Dt_9 + + r_x * r_z2 * Dt_9 + r_x * Dt_7; + pot->D_212 = r_y * r_x2 * r_z2 * Dt_11 + r_y * r_x2 * Dt_9 + + r_y * r_z2 * Dt_9 + r_y * Dt_7; + pot->D_221 = r_z * r_x2 * r_y2 * Dt_11 + r_z * r_x2 * Dt_9 + + r_z * r_y2 * Dt_9 + r_z * Dt_7; +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 5 +#error "Missing implementation for orders >5" +#endif +} + +/** + * @brief Compute all the relevent derivatives of the softened and truncated + * gravitational potential for the M2P kernel. + * + * @param r_x x-component of distance vector + * @param r_y y-component of distance vector + * @param r_z z-component of distance vector + * @param r2 Square norm of distance vector + * @param r_inv Inverse norm of distance vector + * @param eps Softening length. + * @param eps_inv Inverse of softening length. + * @param pot (return) The structure containing all the derivatives. + */ +__attribute__((always_inline)) INLINE static void +compute_potential_derivatives_M2P(float r_x, float r_y, float r_z, float r2, + float r_inv, float eps, float eps_inv, + struct potential_derivatives_M2P *pot) { + + float Dt_1; + float Dt_3; + float Dt_5; + float Dt_7; + + /* Un-softened case */ + if (r2 > eps * eps) { + + const float r_inv2 = r_inv * r_inv; + + Dt_1 = r_inv; + Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */ + Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */ + Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */ + + } else { + + const float r = r2 * r_inv; + const float u = r * eps_inv; + const float u_inv = r_inv * eps; + const float eps_inv2 = eps_inv * eps_inv; + const float eps_inv3 = eps_inv * eps_inv2; + const float eps_inv5 = eps_inv3 * eps_inv2; + const float eps_inv7 = eps_inv5 * eps_inv2; + + Dt_1 = eps_inv * D_soft_1(u, u_inv); + Dt_3 = -eps_inv3 * D_soft_3(u, u_inv); + Dt_5 = eps_inv5 * D_soft_5(u, u_inv); + Dt_7 = -eps_inv7 * D_soft_7(u, u_inv); + } + + /* Compute some powers of r_x, r_y and r_z */ + const float r_x2 = r_x * r_x; + const float r_y2 = r_y * r_y; + const float r_z2 = r_z * r_z; + const float r_x3 = r_x2 * r_x; + const float r_y3 = r_y2 * r_y; + const float r_z3 = r_z2 * r_z; + + /* 1st order derivatives */ + pot->D_100 = r_x * Dt_3; + pot->D_010 = r_y * Dt_3; + pot->D_001 = r_z * Dt_3; + + /* 3rd order derivatives */ + pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5; + pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5; + pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5; + pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5; + pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5; + pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5; + pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5; + pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5; + pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5; + pot->D_111 = r_x * r_y * r_z * Dt_7; } #endif /* SWIFT_GRAVITY_DERIVATIVE_H */ diff --git a/src/gravity_properties.c b/src/gravity_properties.c index 18cf044434f7840a5a76f483540bb924a2365e26..27a5de0a4102cae4ca787c10c60cf3bbc3a983ee 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -52,12 +52,15 @@ void gravity_props_init(struct gravity_props *p, /* Opening angle */ p->theta_crit = parser_get_param_double(params, "Gravity:theta"); + if (p->theta_crit >= 1.) error("Theta too large. FMM won't converge."); + p->theta_crit2 = p->theta_crit * p->theta_crit; p->theta_crit_inv = 1. / p->theta_crit; /* Softening lengths */ p->epsilon = 3. * parser_get_param_double(params, "Gravity:epsilon"); p->epsilon2 = p->epsilon * p->epsilon; - p->epsilon_inv = 1. / p->epsilon; + p->epsilon_inv = 1.f / p->epsilon; + p->epsilon_inv3 = p->epsilon_inv * p->epsilon_inv * p->epsilon_inv; } void gravity_props_print(const struct gravity_props *p) { diff --git a/src/gravity_properties.h b/src/gravity_properties.h index 2a5e4cb1e07ea591e2e3821704ec55abe7980360..f7b9950052b302a003e5d128191c9dbe68fe875f 100644 --- a/src/gravity_properties.h +++ b/src/gravity_properties.h @@ -51,17 +51,23 @@ struct gravity_props { /*! Tree opening angle (Multipole acceptance criterion) */ double theta_crit; + /*! Square of opening angle */ + double theta_crit2; + /*! Inverse of opening angle */ double theta_crit_inv; /*! Softening length */ - double epsilon; + float epsilon; /*! Square of softening length */ - double epsilon2; + float epsilon2; /*! Inverse of softening length */ - double epsilon_inv; + float epsilon_inv; + + /*! Cube of the inverse of softening length */ + float epsilon_inv3; }; void gravity_props_print(const struct gravity_props *p); diff --git a/src/gravity_softened_derivatives.h b/src/gravity_softened_derivatives.h index 3f92476dab5940765b112708a867d940d4d5e6e9..6ef9a0b455a572d8ea6254f9f91941978e7729ac 100644 --- a/src/gravity_softened_derivatives.h +++ b/src/gravity_softened_derivatives.h @@ -34,6 +34,8 @@ #include "inline.h" #include "kernel_gravity.h" +#if 0 + /*************************/ /* 0th order derivatives */ /*************************/ @@ -440,4 +442,6 @@ __attribute__((always_inline)) INLINE static double D_soft_111( return -r_x * r_y * r_z * eps_inv7 * D_soft_3(u); } +#endif + #endif /* SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H */ diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 5a9e839b63422a3f18c80caf9d891dd6f8be5da6..799bda85b0c69dd2757f47fb0225006adb6d1432 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -71,46 +71,74 @@ __attribute__((always_inline)) INLINE static void kernel_grav_eval_double( /* Derivatives of softening kernel used for FMM */ /************************************************/ -__attribute__((always_inline)) INLINE static double D_soft_0(double u) { +__attribute__((always_inline)) INLINE static float D_soft_1(float u, + float u_inv) { /* phi(u) = -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3 */ - double phi = -3. * u + 15.; - phi = phi * u - 28.; - phi = phi * u + 21.; + float phi = -3.f * u + 15.f; + phi = phi * u - 28.f; + phi = phi * u + 21.f; phi = phi * u; - phi = phi * u - 7.; + phi = phi * u - 7.f; phi = phi * u; - phi = phi * u + 3.; + phi = phi * u + 3.f; return phi; } -__attribute__((always_inline)) INLINE static double D_soft_1(double u) { +__attribute__((always_inline)) INLINE static float D_soft_3(float u, + float u_inv) { /* phi'(u)/u = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ - double phi = 21. * u - 90.; - phi = phi * u + 140.; - phi = phi * u - 84.; + float phi = 21.f * u - 90.f; + phi = phi * u + 140.f; + phi = phi * u - 84.f; phi = phi * u; - phi = phi * u + 14.; + phi = phi * u + 14.f; return phi; } -__attribute__((always_inline)) INLINE static double D_soft_2(double u) { +__attribute__((always_inline)) INLINE static float D_soft_5(float u, + float u_inv) { /* (phi'(u)/u)'/u = -105u^3 + 360u^2 - 420u + 168 */ - double phi = -105. * u + 360.; - phi = phi * u - 420.; - phi = phi * u + 168.; + float phi = -105.f * u + 360.f; + phi = phi * u - 420.f; + phi = phi * u + 168.f; return phi; } -__attribute__((always_inline)) INLINE static double D_soft_3(double u) { +__attribute__((always_inline)) INLINE static float D_soft_7(float u, + float u_inv) { - /* ((phi'(u)/u)'/u)'/u = 315u - 720 + 420/u */ - return 315. * u - 720. + 420. / u; + /* ((phi'(u)/u)'/u)'/u = 315u - 720 + 420u^-1 */ + return 315.f * u - 720.f + 420.f * u_inv; +} + +__attribute__((always_inline)) INLINE static float D_soft_9(float u, + float u_inv) { + + /* (((phi'(u)/u)'/u)'/u)'/u = -315u^-1 + 420u^-3 */ + float phi = 420.f * u_inv; + phi = phi * u_inv - 315.f; + phi = phi * u_inv; + + return phi; +} + +__attribute__((always_inline)) INLINE static float D_soft_11(float u, + float u_inv) { + + /* ((((phi'(u)/u)'/u)'/u)'/u)'/u = 315u^-3 - 1260u^-5 */ + float phi = -1260.f * u_inv; + phi = phi * u_inv + 315.f; + phi = phi * u_inv; + phi = phi * u_inv; + phi = phi * u_inv; + + return phi; } #endif /* SWIFT_KERNEL_GRAVITY_H */ diff --git a/src/multipole.h b/src/multipole.h index 004757924cccb6bc2f450c19f1ccd600f50e1990..e408e5b6e0b38f724648e3a9bbade30b76e09db0 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -45,48 +45,48 @@ struct grav_tensor { /* 0th order terms */ - double F_000; + float F_000; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 /* 1st order terms */ - double F_100, F_010, F_001; + float F_100, F_010, F_001; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 /* 2nd order terms */ - double F_200, F_020, F_002; - double F_110, F_101, F_011; + float F_200, F_020, F_002; + float F_110, F_101, F_011; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 /* 3rd order terms */ - double F_300, F_030, F_003; - double F_210, F_201; - double F_120, F_021; - double F_102, F_012; - double F_111; + float F_300, F_030, F_003; + float F_210, F_201; + float F_120, F_021; + float F_102, F_012; + float F_111; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 /* 4th order terms */ - double F_400, F_040, F_004; - double F_310, F_301; - double F_130, F_031; - double F_103, F_013; - double F_220, F_202, F_022; - double F_211, F_121, F_112; + float F_400, F_040, F_004; + float F_310, F_301; + float F_130, F_031; + float F_103, F_013; + float F_220, F_202, F_022; + float F_211, F_121, F_112; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 /* 5th order terms */ - double F_005, F_014, F_023; - double F_032, F_041, F_050; - double F_104, F_113, F_122; - double F_131, F_140, F_203; - double F_212, F_221, F_230; - double F_302, F_311, F_320; - double F_401, F_410, F_500; + float F_005, F_014, F_023; + float F_032, F_041, F_050; + float F_104, F_113, F_122; + float F_131, F_140, F_203; + float F_212, F_221, F_230; + float F_302, F_311, F_320; + float F_401, F_410, F_500; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 #error "Missing implementation for order >5" @@ -96,7 +96,13 @@ struct grav_tensor { /* Total number of gpart this field tensor interacted with */ long long num_interacted; + /* Last time this tensor was zeroed */ + integertime_t ti_init; + #endif + + /* Has this tensor received any contribution? */ + char interacted; }; struct multipole { @@ -173,6 +179,12 @@ struct gravity_tensors { /*! The actual content */ struct { + /*! Multipole mass */ + struct multipole m_pole; + + /*! Field tensor for the potential */ + struct grav_tensor pot; + /*! Centre of mass of the matter dsitribution */ double CoM[3]; @@ -184,12 +196,6 @@ struct gravity_tensors { /*! Upper limit of the CoM<->gpart distance at the last rebuild */ double r_max_rebuild; - - /*! Multipole mass */ - struct multipole m_pole; - - /*! Field tensor for the potential */ - struct grav_tensor pot; }; }; } SWIFT_STRUCT_ALIGN; @@ -210,8 +216,11 @@ INLINE static void gravity_reset(struct gravity_tensors *m) { * * @param m The #multipole. * @param dt The drift time-step. + * @param x_diff The maximal distance moved by any particle since the last + * rebuild. */ -INLINE static void gravity_drift(struct gravity_tensors *m, double dt) { +INLINE static void gravity_drift(struct gravity_tensors *m, double dt, + float x_diff) { const double dx = m->m_pole.vel[0] * dt; const double dy = m->m_pole.vel[1] * dt; @@ -223,22 +232,27 @@ INLINE static void gravity_drift(struct gravity_tensors *m, double dt) { m->CoM[2] += dz; /* Conservative change in maximal radius containing all gpart */ - /* MATTHIEU: Use gpart->x_diff here ? */ - m->r_max += sqrt(dx * dx + dy * dy + dz * dz); + m->r_max = m->r_max_rebuild + 2. * x_diff; } /** * @brief Zeroes all the fields of a field tensor * * @param l The field tensor. + * @param ti_current The current (integer) time (for debugging only). */ -INLINE static void gravity_field_tensors_init(struct grav_tensor *l) { +INLINE static void gravity_field_tensors_init(struct grav_tensor *l, + integertime_t ti_current) { bzero(l, sizeof(struct grav_tensor)); + +#ifdef SWIFT_DEBUG_CHECKS + l->ti_init = ti_current; +#endif } /** - * @brief Adds field tensrs to other ones (i.e. does la += lb). + * @brief Adds a field tensor to another one (i.e. does la += lb). * * @param la The gravity tensors to add to. * @param lb The gravity tensors to add. @@ -250,6 +264,8 @@ INLINE static void gravity_field_tensors_add(struct grav_tensor *la, la->num_interacted += lb->num_interacted; #endif + la->interacted = 1; + /* Add 0th order terms */ la->F_000 += lb->F_000; @@ -338,6 +354,7 @@ INLINE static void gravity_field_tensors_add(struct grav_tensor *la, INLINE static void gravity_field_tensors_print(const struct grav_tensor *l) { printf("-------------------------\n"); + printf("Interacted: %d\n", l->interacted); printf("F_000= %12.5e\n", l->F_000); #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 printf("-------------------------\n"); @@ -1507,12 +1524,13 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, const double dim[3]) { /* Recover some constants */ - const double eps2 = props->epsilon2; + const float eps = props->epsilon; + const float eps_inv = props->epsilon_inv; /* Compute distance vector */ - double dx = pos_b[0] - pos_a[0]; - double dy = pos_b[1] - pos_a[1]; - double dz = pos_b[2] - pos_a[2]; + float dx = (float)(pos_b[0] - pos_a[0]); + float dy = (float)(pos_b[1] - pos_a[1]); + float dz = (float)(pos_b[2] - pos_a[2]); /* Apply BC */ if (periodic) { @@ -1522,652 +1540,350 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, } /* Compute distance */ - const double r2 = dx * dx + dy * dy + dz * dz; - const double r_inv = 1. / sqrt(r2); + const float r2 = dx * dx + dy * dy + dz * dz; + const float r_inv = 1. / sqrtf(r2); + + /* Compute all derivatives */ + struct potential_derivatives_M2L pot; + compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, &pot); #ifdef SWIFT_DEBUG_CHECKS /* Count interactions */ l_b->num_interacted += m_a->num_gpart; #endif - /* Un-softened case */ - if (r2 > eps2) { + /* Record that this tensor has received contributions */ + l_b->interacted = 1; - /* 0th order term */ - l_b->F_000 += m_a->M_000 * D_000(dx, dy, dz, r_inv); + /* 0th order term */ + l_b->F_000 += m_a->M_000 * pot.D_000; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - /* 1st order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_100 * D_100(dx, dy, dz, r_inv) + - m_a->M_010 * D_010(dx, dy, dz, r_inv) + - m_a->M_001 * D_001(dx, dy, dz, r_inv); + /* 1st order multipole term (addition to rank 0)*/ + l_b->F_000 += + m_a->M_100 * pot.D_100 + m_a->M_010 * pot.D_010 + m_a->M_001 * pot.D_001; - /* 1st order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_000 * D_100(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_000 * D_010(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_000 * D_001(dx, dy, dz, r_inv); + /* 1st order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_000 * pot.D_100; + l_b->F_010 += m_a->M_000 * pot.D_010; + l_b->F_001 += m_a->M_000 * pot.D_001; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - /* 2nd order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_200 * D_200(dx, dy, dz, r_inv) + - m_a->M_020 * D_020(dx, dy, dz, r_inv) + - m_a->M_002 * D_002(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_110 * D_110(dx, dy, dz, r_inv) + - m_a->M_101 * D_101(dx, dy, dz, r_inv) + - m_a->M_011 * D_011(dx, dy, dz, r_inv); - - /* 2nd order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_100 * D_200(dx, dy, dz, r_inv) + - m_a->M_010 * D_110(dx, dy, dz, r_inv) + - m_a->M_001 * D_101(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_100 * D_110(dx, dy, dz, r_inv) + - m_a->M_010 * D_020(dx, dy, dz, r_inv) + - m_a->M_001 * D_011(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_100 * D_101(dx, dy, dz, r_inv) + - m_a->M_010 * D_011(dx, dy, dz, r_inv) + - m_a->M_001 * D_002(dx, dy, dz, r_inv); - - /* 2nd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_000 * D_200(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_000 * D_020(dx, dy, dz, r_inv); - l_b->F_002 += m_a->M_000 * D_002(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_000 * D_110(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_000 * D_101(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_000 * D_011(dx, dy, dz, r_inv); + /* 2nd order multipole term (addition to rank 0)*/ + l_b->F_000 += + m_a->M_200 * pot.D_200 + m_a->M_020 * pot.D_020 + m_a->M_002 * pot.D_002; + l_b->F_000 += + m_a->M_110 * pot.D_110 + m_a->M_101 * pot.D_101 + m_a->M_011 * pot.D_011; + + /* 2nd order multipole term (addition to rank 1)*/ + l_b->F_100 += + m_a->M_100 * pot.D_200 + m_a->M_010 * pot.D_110 + m_a->M_001 * pot.D_101; + l_b->F_010 += + m_a->M_100 * pot.D_110 + m_a->M_010 * pot.D_020 + m_a->M_001 * pot.D_011; + l_b->F_001 += + m_a->M_100 * pot.D_101 + m_a->M_010 * pot.D_011 + m_a->M_001 * pot.D_002; + + /* 2nd order multipole term (addition to rank 2)*/ + l_b->F_200 += m_a->M_000 * pot.D_200; + l_b->F_020 += m_a->M_000 * pot.D_020; + l_b->F_002 += m_a->M_000 * pot.D_002; + l_b->F_110 += m_a->M_000 * pot.D_110; + l_b->F_101 += m_a->M_000 * pot.D_101; + l_b->F_011 += m_a->M_000 * pot.D_011; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - /* 3rd order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_300 * D_300(dx, dy, dz, r_inv) + - m_a->M_030 * D_030(dx, dy, dz, r_inv) + - m_a->M_003 * D_003(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_210 * D_210(dx, dy, dz, r_inv) + - m_a->M_201 * D_201(dx, dy, dz, r_inv) + - m_a->M_120 * D_120(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_021 * D_021(dx, dy, dz, r_inv) + - m_a->M_102 * D_102(dx, dy, dz, r_inv) + - m_a->M_012 * D_012(dx, dy, dz, r_inv); - l_b->F_000 += m_a->M_111 * D_111(dx, dy, dz, r_inv); - - /* 3rd order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_200 * D_300(dx, dy, dz, r_inv) + - m_a->M_020 * D_120(dx, dy, dz, r_inv) + - m_a->M_002 * D_102(dx, dy, dz, r_inv); - l_b->F_100 += m_a->M_110 * D_210(dx, dy, dz, r_inv) + - m_a->M_101 * D_201(dx, dy, dz, r_inv) + - m_a->M_011 * D_111(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_200 * D_210(dx, dy, dz, r_inv) + - m_a->M_020 * D_030(dx, dy, dz, r_inv) + - m_a->M_002 * D_012(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_110 * D_120(dx, dy, dz, r_inv) + - m_a->M_101 * D_111(dx, dy, dz, r_inv) + - m_a->M_011 * D_021(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_200 * D_201(dx, dy, dz, r_inv) + - m_a->M_020 * D_021(dx, dy, dz, r_inv) + - m_a->M_002 * D_003(dx, dy, dz, r_inv); - l_b->F_001 += m_a->M_110 * D_111(dx, dy, dz, r_inv) + - m_a->M_101 * D_102(dx, dy, dz, r_inv) + - m_a->M_011 * D_012(dx, dy, dz, r_inv); - - /* 3rd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_100 * D_300(dx, dy, dz, r_inv) + - m_a->M_010 * D_210(dx, dy, dz, r_inv) + - m_a->M_001 * D_201(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_100 * D_120(dx, dy, dz, r_inv) + - m_a->M_010 * D_030(dx, dy, dz, r_inv) + - m_a->M_001 * D_021(dx, dy, dz, r_inv); - l_b->F_002 += m_a->M_100 * D_102(dx, dy, dz, r_inv) + - m_a->M_010 * D_012(dx, dy, dz, r_inv) + - m_a->M_001 * D_003(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_100 * D_210(dx, dy, dz, r_inv) + - m_a->M_010 * D_120(dx, dy, dz, r_inv) + - m_a->M_001 * D_111(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_100 * D_201(dx, dy, dz, r_inv) + - m_a->M_010 * D_111(dx, dy, dz, r_inv) + - m_a->M_001 * D_102(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_100 * D_111(dx, dy, dz, r_inv) + - m_a->M_010 * D_021(dx, dy, dz, r_inv) + - m_a->M_001 * D_012(dx, dy, dz, r_inv); - - /* 3rd order multipole term (addition to rank 3)*/ - l_b->F_300 += m_a->M_000 * D_300(dx, dy, dz, r_inv); - l_b->F_030 += m_a->M_000 * D_030(dx, dy, dz, r_inv); - l_b->F_003 += m_a->M_000 * D_003(dx, dy, dz, r_inv); - l_b->F_210 += m_a->M_000 * D_210(dx, dy, dz, r_inv); - l_b->F_201 += m_a->M_000 * D_201(dx, dy, dz, r_inv); - l_b->F_120 += m_a->M_000 * D_120(dx, dy, dz, r_inv); - l_b->F_021 += m_a->M_000 * D_021(dx, dy, dz, r_inv); - l_b->F_102 += m_a->M_000 * D_102(dx, dy, dz, r_inv); - l_b->F_012 += m_a->M_000 * D_012(dx, dy, dz, r_inv); - l_b->F_111 += m_a->M_000 * D_111(dx, dy, dz, r_inv); + /* 3rd order multipole term (addition to rank 0)*/ + l_b->F_000 += + m_a->M_300 * pot.D_300 + m_a->M_030 * pot.D_030 + m_a->M_003 * pot.D_003; + l_b->F_000 += + m_a->M_210 * pot.D_210 + m_a->M_201 * pot.D_201 + m_a->M_120 * pot.D_120; + l_b->F_000 += + m_a->M_021 * pot.D_021 + m_a->M_102 * pot.D_102 + m_a->M_012 * pot.D_012; + l_b->F_000 += m_a->M_111 * pot.D_111; + + /* 3rd order multipole term (addition to rank 1)*/ + l_b->F_100 += + m_a->M_200 * pot.D_300 + m_a->M_020 * pot.D_120 + m_a->M_002 * pot.D_102; + l_b->F_100 += + m_a->M_110 * pot.D_210 + m_a->M_101 * pot.D_201 + m_a->M_011 * pot.D_111; + l_b->F_010 += + m_a->M_200 * pot.D_210 + m_a->M_020 * pot.D_030 + m_a->M_002 * pot.D_012; + l_b->F_010 += + m_a->M_110 * pot.D_120 + m_a->M_101 * pot.D_111 + m_a->M_011 * pot.D_021; + l_b->F_001 += + m_a->M_200 * pot.D_201 + m_a->M_020 * pot.D_021 + m_a->M_002 * pot.D_003; + l_b->F_001 += + m_a->M_110 * pot.D_111 + m_a->M_101 * pot.D_102 + m_a->M_011 * pot.D_012; + + /* 3rd order multipole term (addition to rank 2)*/ + l_b->F_200 += + m_a->M_100 * pot.D_300 + m_a->M_010 * pot.D_210 + m_a->M_001 * pot.D_201; + l_b->F_020 += + m_a->M_100 * pot.D_120 + m_a->M_010 * pot.D_030 + m_a->M_001 * pot.D_021; + l_b->F_002 += + m_a->M_100 * pot.D_102 + m_a->M_010 * pot.D_012 + m_a->M_001 * pot.D_003; + l_b->F_110 += + m_a->M_100 * pot.D_210 + m_a->M_010 * pot.D_120 + m_a->M_001 * pot.D_111; + l_b->F_101 += + m_a->M_100 * pot.D_201 + m_a->M_010 * pot.D_111 + m_a->M_001 * pot.D_102; + l_b->F_011 += + m_a->M_100 * pot.D_111 + m_a->M_010 * pot.D_021 + m_a->M_001 * pot.D_012; + + /* 3rd order multipole term (addition to rank 3)*/ + l_b->F_300 += m_a->M_000 * pot.D_300; + l_b->F_030 += m_a->M_000 * pot.D_030; + l_b->F_003 += m_a->M_000 * pot.D_003; + l_b->F_210 += m_a->M_000 * pot.D_210; + l_b->F_201 += m_a->M_000 * pot.D_201; + l_b->F_120 += m_a->M_000 * pot.D_120; + l_b->F_021 += m_a->M_000 * pot.D_021; + l_b->F_102 += m_a->M_000 * pot.D_102; + l_b->F_012 += m_a->M_000 * pot.D_012; + l_b->F_111 += m_a->M_000 * pot.D_111; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 3 - /* Compute 4th order field tensor terms (addition to rank 0) */ - l_b->F_000 += m_a->M_004 * D_004(dx, dy, dz, r_inv) + - m_a->M_013 * D_013(dx, dy, dz, r_inv) + - m_a->M_022 * D_022(dx, dy, dz, r_inv) + - m_a->M_031 * D_031(dx, dy, dz, r_inv) + - m_a->M_040 * D_040(dx, dy, dz, r_inv) + - m_a->M_103 * D_103(dx, dy, dz, r_inv) + - m_a->M_112 * D_112(dx, dy, dz, r_inv) + - m_a->M_121 * D_121(dx, dy, dz, r_inv) + - m_a->M_130 * D_130(dx, dy, dz, r_inv) + - m_a->M_202 * D_202(dx, dy, dz, r_inv) + - m_a->M_211 * D_211(dx, dy, dz, r_inv) + - m_a->M_220 * D_220(dx, dy, dz, r_inv) + - m_a->M_301 * D_301(dx, dy, dz, r_inv) + - m_a->M_310 * D_310(dx, dy, dz, r_inv) + - m_a->M_400 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 1) */ - l_b->F_001 += m_a->M_003 * D_004(dx, dy, dz, r_inv) + - m_a->M_012 * D_013(dx, dy, dz, r_inv) + - m_a->M_021 * D_022(dx, dy, dz, r_inv) + - m_a->M_030 * D_031(dx, dy, dz, r_inv) + - m_a->M_102 * D_103(dx, dy, dz, r_inv) + - m_a->M_111 * D_112(dx, dy, dz, r_inv) + - m_a->M_120 * D_121(dx, dy, dz, r_inv) + - m_a->M_201 * D_202(dx, dy, dz, r_inv) + - m_a->M_210 * D_211(dx, dy, dz, r_inv) + - m_a->M_300 * D_301(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_003 * D_013(dx, dy, dz, r_inv) + - m_a->M_012 * D_022(dx, dy, dz, r_inv) + - m_a->M_021 * D_031(dx, dy, dz, r_inv) + - m_a->M_030 * D_040(dx, dy, dz, r_inv) + - m_a->M_102 * D_112(dx, dy, dz, r_inv) + - m_a->M_111 * D_121(dx, dy, dz, r_inv) + - m_a->M_120 * D_130(dx, dy, dz, r_inv) + - m_a->M_201 * D_211(dx, dy, dz, r_inv) + - m_a->M_210 * D_220(dx, dy, dz, r_inv) + - m_a->M_300 * D_310(dx, dy, dz, r_inv); - l_b->F_100 += m_a->M_003 * D_103(dx, dy, dz, r_inv) + - m_a->M_012 * D_112(dx, dy, dz, r_inv) + - m_a->M_021 * D_121(dx, dy, dz, r_inv) + - m_a->M_030 * D_130(dx, dy, dz, r_inv) + - m_a->M_102 * D_202(dx, dy, dz, r_inv) + - m_a->M_111 * D_211(dx, dy, dz, r_inv) + - m_a->M_120 * D_220(dx, dy, dz, r_inv) + - m_a->M_201 * D_301(dx, dy, dz, r_inv) + - m_a->M_210 * D_310(dx, dy, dz, r_inv) + - m_a->M_300 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 2) */ - l_b->F_002 += m_a->M_002 * D_004(dx, dy, dz, r_inv) + - m_a->M_011 * D_013(dx, dy, dz, r_inv) + - m_a->M_020 * D_022(dx, dy, dz, r_inv) + - m_a->M_101 * D_103(dx, dy, dz, r_inv) + - m_a->M_110 * D_112(dx, dy, dz, r_inv) + - m_a->M_200 * D_202(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_002 * D_013(dx, dy, dz, r_inv) + - m_a->M_011 * D_022(dx, dy, dz, r_inv) + - m_a->M_020 * D_031(dx, dy, dz, r_inv) + - m_a->M_101 * D_112(dx, dy, dz, r_inv) + - m_a->M_110 * D_121(dx, dy, dz, r_inv) + - m_a->M_200 * D_211(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_002 * D_022(dx, dy, dz, r_inv) + - m_a->M_011 * D_031(dx, dy, dz, r_inv) + - m_a->M_020 * D_040(dx, dy, dz, r_inv) + - m_a->M_101 * D_121(dx, dy, dz, r_inv) + - m_a->M_110 * D_130(dx, dy, dz, r_inv) + - m_a->M_200 * D_220(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_002 * D_103(dx, dy, dz, r_inv) + - m_a->M_011 * D_112(dx, dy, dz, r_inv) + - m_a->M_020 * D_121(dx, dy, dz, r_inv) + - m_a->M_101 * D_202(dx, dy, dz, r_inv) + - m_a->M_110 * D_211(dx, dy, dz, r_inv) + - m_a->M_200 * D_301(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_002 * D_112(dx, dy, dz, r_inv) + - m_a->M_011 * D_121(dx, dy, dz, r_inv) + - m_a->M_020 * D_130(dx, dy, dz, r_inv) + - m_a->M_101 * D_211(dx, dy, dz, r_inv) + - m_a->M_110 * D_220(dx, dy, dz, r_inv) + - m_a->M_200 * D_310(dx, dy, dz, r_inv); - l_b->F_200 += m_a->M_002 * D_202(dx, dy, dz, r_inv) + - m_a->M_011 * D_211(dx, dy, dz, r_inv) + - m_a->M_020 * D_220(dx, dy, dz, r_inv) + - m_a->M_101 * D_301(dx, dy, dz, r_inv) + - m_a->M_110 * D_310(dx, dy, dz, r_inv) + - m_a->M_200 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 3) */ - l_b->F_003 += m_a->M_001 * D_004(dx, dy, dz, r_inv) + - m_a->M_010 * D_013(dx, dy, dz, r_inv) + - m_a->M_100 * D_103(dx, dy, dz, r_inv); - l_b->F_012 += m_a->M_001 * D_013(dx, dy, dz, r_inv) + - m_a->M_010 * D_022(dx, dy, dz, r_inv) + - m_a->M_100 * D_112(dx, dy, dz, r_inv); - l_b->F_021 += m_a->M_001 * D_022(dx, dy, dz, r_inv) + - m_a->M_010 * D_031(dx, dy, dz, r_inv) + - m_a->M_100 * D_121(dx, dy, dz, r_inv); - l_b->F_030 += m_a->M_001 * D_031(dx, dy, dz, r_inv) + - m_a->M_010 * D_040(dx, dy, dz, r_inv) + - m_a->M_100 * D_130(dx, dy, dz, r_inv); - l_b->F_102 += m_a->M_001 * D_103(dx, dy, dz, r_inv) + - m_a->M_010 * D_112(dx, dy, dz, r_inv) + - m_a->M_100 * D_202(dx, dy, dz, r_inv); - l_b->F_111 += m_a->M_001 * D_112(dx, dy, dz, r_inv) + - m_a->M_010 * D_121(dx, dy, dz, r_inv) + - m_a->M_100 * D_211(dx, dy, dz, r_inv); - l_b->F_120 += m_a->M_001 * D_121(dx, dy, dz, r_inv) + - m_a->M_010 * D_130(dx, dy, dz, r_inv) + - m_a->M_100 * D_220(dx, dy, dz, r_inv); - l_b->F_201 += m_a->M_001 * D_202(dx, dy, dz, r_inv) + - m_a->M_010 * D_211(dx, dy, dz, r_inv) + - m_a->M_100 * D_301(dx, dy, dz, r_inv); - l_b->F_210 += m_a->M_001 * D_211(dx, dy, dz, r_inv) + - m_a->M_010 * D_220(dx, dy, dz, r_inv) + - m_a->M_100 * D_310(dx, dy, dz, r_inv); - l_b->F_300 += m_a->M_001 * D_301(dx, dy, dz, r_inv) + - m_a->M_010 * D_310(dx, dy, dz, r_inv) + - m_a->M_100 * D_400(dx, dy, dz, r_inv); - - /* Compute 4th order field tensor terms (addition to rank 4) */ - l_b->F_004 += m_a->M_000 * D_004(dx, dy, dz, r_inv); - l_b->F_013 += m_a->M_000 * D_013(dx, dy, dz, r_inv); - l_b->F_022 += m_a->M_000 * D_022(dx, dy, dz, r_inv); - l_b->F_031 += m_a->M_000 * D_031(dx, dy, dz, r_inv); - l_b->F_040 += m_a->M_000 * D_040(dx, dy, dz, r_inv); - l_b->F_103 += m_a->M_000 * D_103(dx, dy, dz, r_inv); - l_b->F_112 += m_a->M_000 * D_112(dx, dy, dz, r_inv); - l_b->F_121 += m_a->M_000 * D_121(dx, dy, dz, r_inv); - l_b->F_130 += m_a->M_000 * D_130(dx, dy, dz, r_inv); - l_b->F_202 += m_a->M_000 * D_202(dx, dy, dz, r_inv); - l_b->F_211 += m_a->M_000 * D_211(dx, dy, dz, r_inv); - l_b->F_220 += m_a->M_000 * D_220(dx, dy, dz, r_inv); - l_b->F_301 += m_a->M_000 * D_301(dx, dy, dz, r_inv); - l_b->F_310 += m_a->M_000 * D_310(dx, dy, dz, r_inv); - l_b->F_400 += m_a->M_000 * D_400(dx, dy, dz, r_inv); + /* Compute 4th order field tensor terms (addition to rank 0) */ + l_b->F_000 += + m_a->M_004 * pot.D_004 + m_a->M_013 * pot.D_013 + m_a->M_022 * pot.D_022 + + m_a->M_031 * pot.D_031 + m_a->M_040 * pot.D_040 + m_a->M_103 * pot.D_103 + + m_a->M_112 * pot.D_112 + m_a->M_121 * pot.D_121 + m_a->M_130 * pot.D_130 + + m_a->M_202 * pot.D_202 + m_a->M_211 * pot.D_211 + m_a->M_220 * pot.D_220 + + m_a->M_301 * pot.D_301 + m_a->M_310 * pot.D_310 + m_a->M_400 * pot.D_400; + + /* Compute 4th order field tensor terms (addition to rank 1) */ + l_b->F_001 += m_a->M_003 * pot.D_004 + m_a->M_012 * pot.D_013 + + m_a->M_021 * pot.D_022 + m_a->M_030 * pot.D_031 + + m_a->M_102 * pot.D_103 + m_a->M_111 * pot.D_112 + + m_a->M_120 * pot.D_121 + m_a->M_201 * pot.D_202 + + m_a->M_210 * pot.D_211 + m_a->M_300 * pot.D_301; + l_b->F_010 += m_a->M_003 * pot.D_013 + m_a->M_012 * pot.D_022 + + m_a->M_021 * pot.D_031 + m_a->M_030 * pot.D_040 + + m_a->M_102 * pot.D_112 + m_a->M_111 * pot.D_121 + + m_a->M_120 * pot.D_130 + m_a->M_201 * pot.D_211 + + m_a->M_210 * pot.D_220 + m_a->M_300 * pot.D_310; + l_b->F_100 += m_a->M_003 * pot.D_103 + m_a->M_012 * pot.D_112 + + m_a->M_021 * pot.D_121 + m_a->M_030 * pot.D_130 + + m_a->M_102 * pot.D_202 + m_a->M_111 * pot.D_211 + + m_a->M_120 * pot.D_220 + m_a->M_201 * pot.D_301 + + m_a->M_210 * pot.D_310 + m_a->M_300 * pot.D_400; + + /* Compute 4th order field tensor terms (addition to rank 2) */ + l_b->F_002 += m_a->M_002 * pot.D_004 + m_a->M_011 * pot.D_013 + + m_a->M_020 * pot.D_022 + m_a->M_101 * pot.D_103 + + m_a->M_110 * pot.D_112 + m_a->M_200 * pot.D_202; + l_b->F_011 += m_a->M_002 * pot.D_013 + m_a->M_011 * pot.D_022 + + m_a->M_020 * pot.D_031 + m_a->M_101 * pot.D_112 + + m_a->M_110 * pot.D_121 + m_a->M_200 * pot.D_211; + l_b->F_020 += m_a->M_002 * pot.D_022 + m_a->M_011 * pot.D_031 + + m_a->M_020 * pot.D_040 + m_a->M_101 * pot.D_121 + + m_a->M_110 * pot.D_130 + m_a->M_200 * pot.D_220; + l_b->F_101 += m_a->M_002 * pot.D_103 + m_a->M_011 * pot.D_112 + + m_a->M_020 * pot.D_121 + m_a->M_101 * pot.D_202 + + m_a->M_110 * pot.D_211 + m_a->M_200 * pot.D_301; + l_b->F_110 += m_a->M_002 * pot.D_112 + m_a->M_011 * pot.D_121 + + m_a->M_020 * pot.D_130 + m_a->M_101 * pot.D_211 + + m_a->M_110 * pot.D_220 + m_a->M_200 * pot.D_310; + l_b->F_200 += m_a->M_002 * pot.D_202 + m_a->M_011 * pot.D_211 + + m_a->M_020 * pot.D_220 + m_a->M_101 * pot.D_301 + + m_a->M_110 * pot.D_310 + m_a->M_200 * pot.D_400; + + /* Compute 4th order field tensor terms (addition to rank 3) */ + l_b->F_003 += + m_a->M_001 * pot.D_004 + m_a->M_010 * pot.D_013 + m_a->M_100 * pot.D_103; + l_b->F_012 += + m_a->M_001 * pot.D_013 + m_a->M_010 * pot.D_022 + m_a->M_100 * pot.D_112; + l_b->F_021 += + m_a->M_001 * pot.D_022 + m_a->M_010 * pot.D_031 + m_a->M_100 * pot.D_121; + l_b->F_030 += + m_a->M_001 * pot.D_031 + m_a->M_010 * pot.D_040 + m_a->M_100 * pot.D_130; + l_b->F_102 += + m_a->M_001 * pot.D_103 + m_a->M_010 * pot.D_112 + m_a->M_100 * pot.D_202; + l_b->F_111 += + m_a->M_001 * pot.D_112 + m_a->M_010 * pot.D_121 + m_a->M_100 * pot.D_211; + l_b->F_120 += + m_a->M_001 * pot.D_121 + m_a->M_010 * pot.D_130 + m_a->M_100 * pot.D_220; + l_b->F_201 += + m_a->M_001 * pot.D_202 + m_a->M_010 * pot.D_211 + m_a->M_100 * pot.D_301; + l_b->F_210 += + m_a->M_001 * pot.D_211 + m_a->M_010 * pot.D_220 + m_a->M_100 * pot.D_310; + l_b->F_300 += + m_a->M_001 * pot.D_301 + m_a->M_010 * pot.D_310 + m_a->M_100 * pot.D_400; + + /* Compute 4th order field tensor terms (addition to rank 4) */ + l_b->F_004 += m_a->M_000 * pot.D_004; + l_b->F_013 += m_a->M_000 * pot.D_013; + l_b->F_022 += m_a->M_000 * pot.D_022; + l_b->F_031 += m_a->M_000 * pot.D_031; + l_b->F_040 += m_a->M_000 * pot.D_040; + l_b->F_103 += m_a->M_000 * pot.D_103; + l_b->F_112 += m_a->M_000 * pot.D_112; + l_b->F_121 += m_a->M_000 * pot.D_121; + l_b->F_130 += m_a->M_000 * pot.D_130; + l_b->F_202 += m_a->M_000 * pot.D_202; + l_b->F_211 += m_a->M_000 * pot.D_211; + l_b->F_220 += m_a->M_000 * pot.D_220; + l_b->F_301 += m_a->M_000 * pot.D_301; + l_b->F_310 += m_a->M_000 * pot.D_310; + l_b->F_400 += m_a->M_000 * pot.D_400; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 4 - /* Compute 5th order field tensor terms (addition to rank 0) */ - l_b->F_000 += m_a->M_005 * D_005(dx, dy, dz, r_inv) + - m_a->M_014 * D_014(dx, dy, dz, r_inv) + - m_a->M_023 * D_023(dx, dy, dz, r_inv) + - m_a->M_032 * D_032(dx, dy, dz, r_inv) + - m_a->M_041 * D_041(dx, dy, dz, r_inv) + - m_a->M_050 * D_050(dx, dy, dz, r_inv) + - m_a->M_104 * D_104(dx, dy, dz, r_inv) + - m_a->M_113 * D_113(dx, dy, dz, r_inv) + - m_a->M_122 * D_122(dx, dy, dz, r_inv) + - m_a->M_131 * D_131(dx, dy, dz, r_inv) + - m_a->M_140 * D_140(dx, dy, dz, r_inv) + - m_a->M_203 * D_203(dx, dy, dz, r_inv) + - m_a->M_212 * D_212(dx, dy, dz, r_inv) + - m_a->M_221 * D_221(dx, dy, dz, r_inv) + - m_a->M_230 * D_230(dx, dy, dz, r_inv) + - m_a->M_302 * D_302(dx, dy, dz, r_inv) + - m_a->M_311 * D_311(dx, dy, dz, r_inv) + - m_a->M_320 * D_320(dx, dy, dz, r_inv) + - m_a->M_401 * D_401(dx, dy, dz, r_inv) + - m_a->M_410 * D_410(dx, dy, dz, r_inv) + - m_a->M_500 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 1) */ - l_b->F_001 += m_a->M_004 * D_005(dx, dy, dz, r_inv) + - m_a->M_013 * D_014(dx, dy, dz, r_inv) + - m_a->M_022 * D_023(dx, dy, dz, r_inv) + - m_a->M_031 * D_032(dx, dy, dz, r_inv) + - m_a->M_040 * D_041(dx, dy, dz, r_inv) + - m_a->M_103 * D_104(dx, dy, dz, r_inv) + - m_a->M_112 * D_113(dx, dy, dz, r_inv) + - m_a->M_121 * D_122(dx, dy, dz, r_inv) + - m_a->M_130 * D_131(dx, dy, dz, r_inv) + - m_a->M_202 * D_203(dx, dy, dz, r_inv) + - m_a->M_211 * D_212(dx, dy, dz, r_inv) + - m_a->M_220 * D_221(dx, dy, dz, r_inv) + - m_a->M_301 * D_302(dx, dy, dz, r_inv) + - m_a->M_310 * D_311(dx, dy, dz, r_inv) + - m_a->M_400 * D_401(dx, dy, dz, r_inv); - l_b->F_010 += m_a->M_004 * D_014(dx, dy, dz, r_inv) + - m_a->M_013 * D_023(dx, dy, dz, r_inv) + - m_a->M_022 * D_032(dx, dy, dz, r_inv) + - m_a->M_031 * D_041(dx, dy, dz, r_inv) + - m_a->M_040 * D_050(dx, dy, dz, r_inv) + - m_a->M_103 * D_113(dx, dy, dz, r_inv) + - m_a->M_112 * D_122(dx, dy, dz, r_inv) + - m_a->M_121 * D_131(dx, dy, dz, r_inv) + - m_a->M_130 * D_140(dx, dy, dz, r_inv) + - m_a->M_202 * D_212(dx, dy, dz, r_inv) + - m_a->M_211 * D_221(dx, dy, dz, r_inv) + - m_a->M_220 * D_230(dx, dy, dz, r_inv) + - m_a->M_301 * D_311(dx, dy, dz, r_inv) + - m_a->M_310 * D_320(dx, dy, dz, r_inv) + - m_a->M_400 * D_410(dx, dy, dz, r_inv); - l_b->F_100 += m_a->M_004 * D_104(dx, dy, dz, r_inv) + - m_a->M_013 * D_113(dx, dy, dz, r_inv) + - m_a->M_022 * D_122(dx, dy, dz, r_inv) + - m_a->M_031 * D_131(dx, dy, dz, r_inv) + - m_a->M_040 * D_140(dx, dy, dz, r_inv) + - m_a->M_103 * D_203(dx, dy, dz, r_inv) + - m_a->M_112 * D_212(dx, dy, dz, r_inv) + - m_a->M_121 * D_221(dx, dy, dz, r_inv) + - m_a->M_130 * D_230(dx, dy, dz, r_inv) + - m_a->M_202 * D_302(dx, dy, dz, r_inv) + - m_a->M_211 * D_311(dx, dy, dz, r_inv) + - m_a->M_220 * D_320(dx, dy, dz, r_inv) + - m_a->M_301 * D_401(dx, dy, dz, r_inv) + - m_a->M_310 * D_410(dx, dy, dz, r_inv) + - m_a->M_400 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 2) */ - l_b->F_002 += m_a->M_003 * D_005(dx, dy, dz, r_inv) + - m_a->M_012 * D_014(dx, dy, dz, r_inv) + - m_a->M_021 * D_023(dx, dy, dz, r_inv) + - m_a->M_030 * D_032(dx, dy, dz, r_inv) + - m_a->M_102 * D_104(dx, dy, dz, r_inv) + - m_a->M_111 * D_113(dx, dy, dz, r_inv) + - m_a->M_120 * D_122(dx, dy, dz, r_inv) + - m_a->M_201 * D_203(dx, dy, dz, r_inv) + - m_a->M_210 * D_212(dx, dy, dz, r_inv) + - m_a->M_300 * D_302(dx, dy, dz, r_inv); - l_b->F_011 += m_a->M_003 * D_014(dx, dy, dz, r_inv) + - m_a->M_012 * D_023(dx, dy, dz, r_inv) + - m_a->M_021 * D_032(dx, dy, dz, r_inv) + - m_a->M_030 * D_041(dx, dy, dz, r_inv) + - m_a->M_102 * D_113(dx, dy, dz, r_inv) + - m_a->M_111 * D_122(dx, dy, dz, r_inv) + - m_a->M_120 * D_131(dx, dy, dz, r_inv) + - m_a->M_201 * D_212(dx, dy, dz, r_inv) + - m_a->M_210 * D_221(dx, dy, dz, r_inv) + - m_a->M_300 * D_311(dx, dy, dz, r_inv); - l_b->F_020 += m_a->M_003 * D_023(dx, dy, dz, r_inv) + - m_a->M_012 * D_032(dx, dy, dz, r_inv) + - m_a->M_021 * D_041(dx, dy, dz, r_inv) + - m_a->M_030 * D_050(dx, dy, dz, r_inv) + - m_a->M_102 * D_122(dx, dy, dz, r_inv) + - m_a->M_111 * D_131(dx, dy, dz, r_inv) + - m_a->M_120 * D_140(dx, dy, dz, r_inv) + - m_a->M_201 * D_221(dx, dy, dz, r_inv) + - m_a->M_210 * D_230(dx, dy, dz, r_inv) + - m_a->M_300 * D_320(dx, dy, dz, r_inv); - l_b->F_101 += m_a->M_003 * D_104(dx, dy, dz, r_inv) + - m_a->M_012 * D_113(dx, dy, dz, r_inv) + - m_a->M_021 * D_122(dx, dy, dz, r_inv) + - m_a->M_030 * D_131(dx, dy, dz, r_inv) + - m_a->M_102 * D_203(dx, dy, dz, r_inv) + - m_a->M_111 * D_212(dx, dy, dz, r_inv) + - m_a->M_120 * D_221(dx, dy, dz, r_inv) + - m_a->M_201 * D_302(dx, dy, dz, r_inv) + - m_a->M_210 * D_311(dx, dy, dz, r_inv) + - m_a->M_300 * D_401(dx, dy, dz, r_inv); - l_b->F_110 += m_a->M_003 * D_113(dx, dy, dz, r_inv) + - m_a->M_012 * D_122(dx, dy, dz, r_inv) + - m_a->M_021 * D_131(dx, dy, dz, r_inv) + - m_a->M_030 * D_140(dx, dy, dz, r_inv) + - m_a->M_102 * D_212(dx, dy, dz, r_inv) + - m_a->M_111 * D_221(dx, dy, dz, r_inv) + - m_a->M_120 * D_230(dx, dy, dz, r_inv) + - m_a->M_201 * D_311(dx, dy, dz, r_inv) + - m_a->M_210 * D_320(dx, dy, dz, r_inv) + - m_a->M_300 * D_410(dx, dy, dz, r_inv); - l_b->F_200 += m_a->M_003 * D_203(dx, dy, dz, r_inv) + - m_a->M_012 * D_212(dx, dy, dz, r_inv) + - m_a->M_021 * D_221(dx, dy, dz, r_inv) + - m_a->M_030 * D_230(dx, dy, dz, r_inv) + - m_a->M_102 * D_302(dx, dy, dz, r_inv) + - m_a->M_111 * D_311(dx, dy, dz, r_inv) + - m_a->M_120 * D_320(dx, dy, dz, r_inv) + - m_a->M_201 * D_401(dx, dy, dz, r_inv) + - m_a->M_210 * D_410(dx, dy, dz, r_inv) + - m_a->M_300 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 3) */ - l_b->F_003 += m_a->M_002 * D_005(dx, dy, dz, r_inv) + - m_a->M_011 * D_014(dx, dy, dz, r_inv) + - m_a->M_020 * D_023(dx, dy, dz, r_inv) + - m_a->M_101 * D_104(dx, dy, dz, r_inv) + - m_a->M_110 * D_113(dx, dy, dz, r_inv) + - m_a->M_200 * D_203(dx, dy, dz, r_inv); - l_b->F_012 += m_a->M_002 * D_014(dx, dy, dz, r_inv) + - m_a->M_011 * D_023(dx, dy, dz, r_inv) + - m_a->M_020 * D_032(dx, dy, dz, r_inv) + - m_a->M_101 * D_113(dx, dy, dz, r_inv) + - m_a->M_110 * D_122(dx, dy, dz, r_inv) + - m_a->M_200 * D_212(dx, dy, dz, r_inv); - l_b->F_021 += m_a->M_002 * D_023(dx, dy, dz, r_inv) + - m_a->M_011 * D_032(dx, dy, dz, r_inv) + - m_a->M_020 * D_041(dx, dy, dz, r_inv) + - m_a->M_101 * D_122(dx, dy, dz, r_inv) + - m_a->M_110 * D_131(dx, dy, dz, r_inv) + - m_a->M_200 * D_221(dx, dy, dz, r_inv); - l_b->F_030 += m_a->M_002 * D_032(dx, dy, dz, r_inv) + - m_a->M_011 * D_041(dx, dy, dz, r_inv) + - m_a->M_020 * D_050(dx, dy, dz, r_inv) + - m_a->M_101 * D_131(dx, dy, dz, r_inv) + - m_a->M_110 * D_140(dx, dy, dz, r_inv) + - m_a->M_200 * D_230(dx, dy, dz, r_inv); - l_b->F_102 += m_a->M_002 * D_104(dx, dy, dz, r_inv) + - m_a->M_011 * D_113(dx, dy, dz, r_inv) + - m_a->M_020 * D_122(dx, dy, dz, r_inv) + - m_a->M_101 * D_203(dx, dy, dz, r_inv) + - m_a->M_110 * D_212(dx, dy, dz, r_inv) + - m_a->M_200 * D_302(dx, dy, dz, r_inv); - l_b->F_111 += m_a->M_002 * D_113(dx, dy, dz, r_inv) + - m_a->M_011 * D_122(dx, dy, dz, r_inv) + - m_a->M_020 * D_131(dx, dy, dz, r_inv) + - m_a->M_101 * D_212(dx, dy, dz, r_inv) + - m_a->M_110 * D_221(dx, dy, dz, r_inv) + - m_a->M_200 * D_311(dx, dy, dz, r_inv); - l_b->F_120 += m_a->M_002 * D_122(dx, dy, dz, r_inv) + - m_a->M_011 * D_131(dx, dy, dz, r_inv) + - m_a->M_020 * D_140(dx, dy, dz, r_inv) + - m_a->M_101 * D_221(dx, dy, dz, r_inv) + - m_a->M_110 * D_230(dx, dy, dz, r_inv) + - m_a->M_200 * D_320(dx, dy, dz, r_inv); - l_b->F_201 += m_a->M_002 * D_203(dx, dy, dz, r_inv) + - m_a->M_011 * D_212(dx, dy, dz, r_inv) + - m_a->M_020 * D_221(dx, dy, dz, r_inv) + - m_a->M_101 * D_302(dx, dy, dz, r_inv) + - m_a->M_110 * D_311(dx, dy, dz, r_inv) + - m_a->M_200 * D_401(dx, dy, dz, r_inv); - l_b->F_210 += m_a->M_002 * D_212(dx, dy, dz, r_inv) + - m_a->M_011 * D_221(dx, dy, dz, r_inv) + - m_a->M_020 * D_230(dx, dy, dz, r_inv) + - m_a->M_101 * D_311(dx, dy, dz, r_inv) + - m_a->M_110 * D_320(dx, dy, dz, r_inv) + - m_a->M_200 * D_410(dx, dy, dz, r_inv); - l_b->F_300 += m_a->M_002 * D_302(dx, dy, dz, r_inv) + - m_a->M_011 * D_311(dx, dy, dz, r_inv) + - m_a->M_020 * D_320(dx, dy, dz, r_inv) + - m_a->M_101 * D_401(dx, dy, dz, r_inv) + - m_a->M_110 * D_410(dx, dy, dz, r_inv) + - m_a->M_200 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 4) */ - l_b->F_004 += m_a->M_001 * D_005(dx, dy, dz, r_inv) + - m_a->M_010 * D_014(dx, dy, dz, r_inv) + - m_a->M_100 * D_104(dx, dy, dz, r_inv); - l_b->F_013 += m_a->M_001 * D_014(dx, dy, dz, r_inv) + - m_a->M_010 * D_023(dx, dy, dz, r_inv) + - m_a->M_100 * D_113(dx, dy, dz, r_inv); - l_b->F_022 += m_a->M_001 * D_023(dx, dy, dz, r_inv) + - m_a->M_010 * D_032(dx, dy, dz, r_inv) + - m_a->M_100 * D_122(dx, dy, dz, r_inv); - l_b->F_031 += m_a->M_001 * D_032(dx, dy, dz, r_inv) + - m_a->M_010 * D_041(dx, dy, dz, r_inv) + - m_a->M_100 * D_131(dx, dy, dz, r_inv); - l_b->F_040 += m_a->M_001 * D_041(dx, dy, dz, r_inv) + - m_a->M_010 * D_050(dx, dy, dz, r_inv) + - m_a->M_100 * D_140(dx, dy, dz, r_inv); - l_b->F_103 += m_a->M_001 * D_104(dx, dy, dz, r_inv) + - m_a->M_010 * D_113(dx, dy, dz, r_inv) + - m_a->M_100 * D_203(dx, dy, dz, r_inv); - l_b->F_112 += m_a->M_001 * D_113(dx, dy, dz, r_inv) + - m_a->M_010 * D_122(dx, dy, dz, r_inv) + - m_a->M_100 * D_212(dx, dy, dz, r_inv); - l_b->F_121 += m_a->M_001 * D_122(dx, dy, dz, r_inv) + - m_a->M_010 * D_131(dx, dy, dz, r_inv) + - m_a->M_100 * D_221(dx, dy, dz, r_inv); - l_b->F_130 += m_a->M_001 * D_131(dx, dy, dz, r_inv) + - m_a->M_010 * D_140(dx, dy, dz, r_inv) + - m_a->M_100 * D_230(dx, dy, dz, r_inv); - l_b->F_202 += m_a->M_001 * D_203(dx, dy, dz, r_inv) + - m_a->M_010 * D_212(dx, dy, dz, r_inv) + - m_a->M_100 * D_302(dx, dy, dz, r_inv); - l_b->F_211 += m_a->M_001 * D_212(dx, dy, dz, r_inv) + - m_a->M_010 * D_221(dx, dy, dz, r_inv) + - m_a->M_100 * D_311(dx, dy, dz, r_inv); - l_b->F_220 += m_a->M_001 * D_221(dx, dy, dz, r_inv) + - m_a->M_010 * D_230(dx, dy, dz, r_inv) + - m_a->M_100 * D_320(dx, dy, dz, r_inv); - l_b->F_301 += m_a->M_001 * D_302(dx, dy, dz, r_inv) + - m_a->M_010 * D_311(dx, dy, dz, r_inv) + - m_a->M_100 * D_401(dx, dy, dz, r_inv); - l_b->F_310 += m_a->M_001 * D_311(dx, dy, dz, r_inv) + - m_a->M_010 * D_320(dx, dy, dz, r_inv) + - m_a->M_100 * D_410(dx, dy, dz, r_inv); - l_b->F_400 += m_a->M_001 * D_401(dx, dy, dz, r_inv) + - m_a->M_010 * D_410(dx, dy, dz, r_inv) + - m_a->M_100 * D_500(dx, dy, dz, r_inv); - - /* Compute 5th order field tensor terms (addition to rank 5) */ - l_b->F_005 += m_a->M_000 * D_005(dx, dy, dz, r_inv); - l_b->F_014 += m_a->M_000 * D_014(dx, dy, dz, r_inv); - l_b->F_023 += m_a->M_000 * D_023(dx, dy, dz, r_inv); - l_b->F_032 += m_a->M_000 * D_032(dx, dy, dz, r_inv); - l_b->F_041 += m_a->M_000 * D_041(dx, dy, dz, r_inv); - l_b->F_050 += m_a->M_000 * D_050(dx, dy, dz, r_inv); - l_b->F_104 += m_a->M_000 * D_104(dx, dy, dz, r_inv); - l_b->F_113 += m_a->M_000 * D_113(dx, dy, dz, r_inv); - l_b->F_122 += m_a->M_000 * D_122(dx, dy, dz, r_inv); - l_b->F_131 += m_a->M_000 * D_131(dx, dy, dz, r_inv); - l_b->F_140 += m_a->M_000 * D_140(dx, dy, dz, r_inv); - l_b->F_203 += m_a->M_000 * D_203(dx, dy, dz, r_inv); - l_b->F_212 += m_a->M_000 * D_212(dx, dy, dz, r_inv); - l_b->F_221 += m_a->M_000 * D_221(dx, dy, dz, r_inv); - l_b->F_230 += m_a->M_000 * D_230(dx, dy, dz, r_inv); - l_b->F_302 += m_a->M_000 * D_302(dx, dy, dz, r_inv); - l_b->F_311 += m_a->M_000 * D_311(dx, dy, dz, r_inv); - l_b->F_320 += m_a->M_000 * D_320(dx, dy, dz, r_inv); - l_b->F_401 += m_a->M_000 * D_401(dx, dy, dz, r_inv); - l_b->F_410 += m_a->M_000 * D_410(dx, dy, dz, r_inv); - l_b->F_500 += m_a->M_000 * D_500(dx, dy, dz, r_inv); + /* Compute 5th order field tensor terms (addition to rank 0) */ + l_b->F_000 += + m_a->M_005 * pot.D_005 + m_a->M_014 * pot.D_014 + m_a->M_023 * pot.D_023 + + m_a->M_032 * pot.D_032 + m_a->M_041 * pot.D_041 + m_a->M_050 * pot.D_050 + + m_a->M_104 * pot.D_104 + m_a->M_113 * pot.D_113 + m_a->M_122 * pot.D_122 + + m_a->M_131 * pot.D_131 + m_a->M_140 * pot.D_140 + m_a->M_203 * pot.D_203 + + m_a->M_212 * pot.D_212 + m_a->M_221 * pot.D_221 + m_a->M_230 * pot.D_230 + + m_a->M_302 * pot.D_302 + m_a->M_311 * pot.D_311 + m_a->M_320 * pot.D_320 + + m_a->M_401 * pot.D_401 + m_a->M_410 * pot.D_410 + m_a->M_500 * pot.D_500; + + /* Compute 5th order field tensor terms (addition to rank 1) */ + l_b->F_001 += + m_a->M_004 * pot.D_005 + m_a->M_013 * pot.D_014 + m_a->M_022 * pot.D_023 + + m_a->M_031 * pot.D_032 + m_a->M_040 * pot.D_041 + m_a->M_103 * pot.D_104 + + m_a->M_112 * pot.D_113 + m_a->M_121 * pot.D_122 + m_a->M_130 * pot.D_131 + + m_a->M_202 * pot.D_203 + m_a->M_211 * pot.D_212 + m_a->M_220 * pot.D_221 + + m_a->M_301 * pot.D_302 + m_a->M_310 * pot.D_311 + m_a->M_400 * pot.D_401; + l_b->F_010 += + m_a->M_004 * pot.D_014 + m_a->M_013 * pot.D_023 + m_a->M_022 * pot.D_032 + + m_a->M_031 * pot.D_041 + m_a->M_040 * pot.D_050 + m_a->M_103 * pot.D_113 + + m_a->M_112 * pot.D_122 + m_a->M_121 * pot.D_131 + m_a->M_130 * pot.D_140 + + m_a->M_202 * pot.D_212 + m_a->M_211 * pot.D_221 + m_a->M_220 * pot.D_230 + + m_a->M_301 * pot.D_311 + m_a->M_310 * pot.D_320 + m_a->M_400 * pot.D_410; + l_b->F_100 += + m_a->M_004 * pot.D_104 + m_a->M_013 * pot.D_113 + m_a->M_022 * pot.D_122 + + m_a->M_031 * pot.D_131 + m_a->M_040 * pot.D_140 + m_a->M_103 * pot.D_203 + + m_a->M_112 * pot.D_212 + m_a->M_121 * pot.D_221 + m_a->M_130 * pot.D_230 + + m_a->M_202 * pot.D_302 + m_a->M_211 * pot.D_311 + m_a->M_220 * pot.D_320 + + m_a->M_301 * pot.D_401 + m_a->M_310 * pot.D_410 + m_a->M_400 * pot.D_500; + + /* Compute 5th order field tensor terms (addition to rank 2) */ + l_b->F_002 += m_a->M_003 * pot.D_005 + m_a->M_012 * pot.D_014 + + m_a->M_021 * pot.D_023 + m_a->M_030 * pot.D_032 + + m_a->M_102 * pot.D_104 + m_a->M_111 * pot.D_113 + + m_a->M_120 * pot.D_122 + m_a->M_201 * pot.D_203 + + m_a->M_210 * pot.D_212 + m_a->M_300 * pot.D_302; + l_b->F_011 += m_a->M_003 * pot.D_014 + m_a->M_012 * pot.D_023 + + m_a->M_021 * pot.D_032 + m_a->M_030 * pot.D_041 + + m_a->M_102 * pot.D_113 + m_a->M_111 * pot.D_122 + + m_a->M_120 * pot.D_131 + m_a->M_201 * pot.D_212 + + m_a->M_210 * pot.D_221 + m_a->M_300 * pot.D_311; + l_b->F_020 += m_a->M_003 * pot.D_023 + m_a->M_012 * pot.D_032 + + m_a->M_021 * pot.D_041 + m_a->M_030 * pot.D_050 + + m_a->M_102 * pot.D_122 + m_a->M_111 * pot.D_131 + + m_a->M_120 * pot.D_140 + m_a->M_201 * pot.D_221 + + m_a->M_210 * pot.D_230 + m_a->M_300 * pot.D_320; + l_b->F_101 += m_a->M_003 * pot.D_104 + m_a->M_012 * pot.D_113 + + m_a->M_021 * pot.D_122 + m_a->M_030 * pot.D_131 + + m_a->M_102 * pot.D_203 + m_a->M_111 * pot.D_212 + + m_a->M_120 * pot.D_221 + m_a->M_201 * pot.D_302 + + m_a->M_210 * pot.D_311 + m_a->M_300 * pot.D_401; + l_b->F_110 += m_a->M_003 * pot.D_113 + m_a->M_012 * pot.D_122 + + m_a->M_021 * pot.D_131 + m_a->M_030 * pot.D_140 + + m_a->M_102 * pot.D_212 + m_a->M_111 * pot.D_221 + + m_a->M_120 * pot.D_230 + m_a->M_201 * pot.D_311 + + m_a->M_210 * pot.D_320 + m_a->M_300 * pot.D_410; + l_b->F_200 += m_a->M_003 * pot.D_203 + m_a->M_012 * pot.D_212 + + m_a->M_021 * pot.D_221 + m_a->M_030 * pot.D_230 + + m_a->M_102 * pot.D_302 + m_a->M_111 * pot.D_311 + + m_a->M_120 * pot.D_320 + m_a->M_201 * pot.D_401 + + m_a->M_210 * pot.D_410 + m_a->M_300 * pot.D_500; + + /* Compute 5th order field tensor terms (addition to rank 3) */ + l_b->F_003 += m_a->M_002 * pot.D_005 + m_a->M_011 * pot.D_014 + + m_a->M_020 * pot.D_023 + m_a->M_101 * pot.D_104 + + m_a->M_110 * pot.D_113 + m_a->M_200 * pot.D_203; + l_b->F_012 += m_a->M_002 * pot.D_014 + m_a->M_011 * pot.D_023 + + m_a->M_020 * pot.D_032 + m_a->M_101 * pot.D_113 + + m_a->M_110 * pot.D_122 + m_a->M_200 * pot.D_212; + l_b->F_021 += m_a->M_002 * pot.D_023 + m_a->M_011 * pot.D_032 + + m_a->M_020 * pot.D_041 + m_a->M_101 * pot.D_122 + + m_a->M_110 * pot.D_131 + m_a->M_200 * pot.D_221; + l_b->F_030 += m_a->M_002 * pot.D_032 + m_a->M_011 * pot.D_041 + + m_a->M_020 * pot.D_050 + m_a->M_101 * pot.D_131 + + m_a->M_110 * pot.D_140 + m_a->M_200 * pot.D_230; + l_b->F_102 += m_a->M_002 * pot.D_104 + m_a->M_011 * pot.D_113 + + m_a->M_020 * pot.D_122 + m_a->M_101 * pot.D_203 + + m_a->M_110 * pot.D_212 + m_a->M_200 * pot.D_302; + l_b->F_111 += m_a->M_002 * pot.D_113 + m_a->M_011 * pot.D_122 + + m_a->M_020 * pot.D_131 + m_a->M_101 * pot.D_212 + + m_a->M_110 * pot.D_221 + m_a->M_200 * pot.D_311; + l_b->F_120 += m_a->M_002 * pot.D_122 + m_a->M_011 * pot.D_131 + + m_a->M_020 * pot.D_140 + m_a->M_101 * pot.D_221 + + m_a->M_110 * pot.D_230 + m_a->M_200 * pot.D_320; + l_b->F_201 += m_a->M_002 * pot.D_203 + m_a->M_011 * pot.D_212 + + m_a->M_020 * pot.D_221 + m_a->M_101 * pot.D_302 + + m_a->M_110 * pot.D_311 + m_a->M_200 * pot.D_401; + l_b->F_210 += m_a->M_002 * pot.D_212 + m_a->M_011 * pot.D_221 + + m_a->M_020 * pot.D_230 + m_a->M_101 * pot.D_311 + + m_a->M_110 * pot.D_320 + m_a->M_200 * pot.D_410; + l_b->F_300 += m_a->M_002 * pot.D_302 + m_a->M_011 * pot.D_311 + + m_a->M_020 * pot.D_320 + m_a->M_101 * pot.D_401 + + m_a->M_110 * pot.D_410 + m_a->M_200 * pot.D_500; + + /* Compute 5th order field tensor terms (addition to rank 4) */ + l_b->F_004 += + m_a->M_001 * pot.D_005 + m_a->M_010 * pot.D_014 + m_a->M_100 * pot.D_104; + l_b->F_013 += + m_a->M_001 * pot.D_014 + m_a->M_010 * pot.D_023 + m_a->M_100 * pot.D_113; + l_b->F_022 += + m_a->M_001 * pot.D_023 + m_a->M_010 * pot.D_032 + m_a->M_100 * pot.D_122; + l_b->F_031 += + m_a->M_001 * pot.D_032 + m_a->M_010 * pot.D_041 + m_a->M_100 * pot.D_131; + l_b->F_040 += + m_a->M_001 * pot.D_041 + m_a->M_010 * pot.D_050 + m_a->M_100 * pot.D_140; + l_b->F_103 += + m_a->M_001 * pot.D_104 + m_a->M_010 * pot.D_113 + m_a->M_100 * pot.D_203; + l_b->F_112 += + m_a->M_001 * pot.D_113 + m_a->M_010 * pot.D_122 + m_a->M_100 * pot.D_212; + l_b->F_121 += + m_a->M_001 * pot.D_122 + m_a->M_010 * pot.D_131 + m_a->M_100 * pot.D_221; + l_b->F_130 += + m_a->M_001 * pot.D_131 + m_a->M_010 * pot.D_140 + m_a->M_100 * pot.D_230; + l_b->F_202 += + m_a->M_001 * pot.D_203 + m_a->M_010 * pot.D_212 + m_a->M_100 * pot.D_302; + l_b->F_211 += + m_a->M_001 * pot.D_212 + m_a->M_010 * pot.D_221 + m_a->M_100 * pot.D_311; + l_b->F_220 += + m_a->M_001 * pot.D_221 + m_a->M_010 * pot.D_230 + m_a->M_100 * pot.D_320; + l_b->F_301 += + m_a->M_001 * pot.D_302 + m_a->M_010 * pot.D_311 + m_a->M_100 * pot.D_401; + l_b->F_310 += + m_a->M_001 * pot.D_311 + m_a->M_010 * pot.D_320 + m_a->M_100 * pot.D_410; + l_b->F_400 += + m_a->M_001 * pot.D_401 + m_a->M_010 * pot.D_410 + m_a->M_100 * pot.D_500; + + /* Compute 5th order field tensor terms (addition to rank 5) */ + l_b->F_005 += m_a->M_000 * pot.D_005; + l_b->F_014 += m_a->M_000 * pot.D_014; + l_b->F_023 += m_a->M_000 * pot.D_023; + l_b->F_032 += m_a->M_000 * pot.D_032; + l_b->F_041 += m_a->M_000 * pot.D_041; + l_b->F_050 += m_a->M_000 * pot.D_050; + l_b->F_104 += m_a->M_000 * pot.D_104; + l_b->F_113 += m_a->M_000 * pot.D_113; + l_b->F_122 += m_a->M_000 * pot.D_122; + l_b->F_131 += m_a->M_000 * pot.D_131; + l_b->F_140 += m_a->M_000 * pot.D_140; + l_b->F_203 += m_a->M_000 * pot.D_203; + l_b->F_212 += m_a->M_000 * pot.D_212; + l_b->F_221 += m_a->M_000 * pot.D_221; + l_b->F_230 += m_a->M_000 * pot.D_230; + l_b->F_302 += m_a->M_000 * pot.D_302; + l_b->F_311 += m_a->M_000 * pot.D_311; + l_b->F_320 += m_a->M_000 * pot.D_320; + l_b->F_401 += m_a->M_000 * pot.D_401; + l_b->F_410 += m_a->M_000 * pot.D_410; + l_b->F_500 += m_a->M_000 * pot.D_500; #endif #if SELF_GRAVITY_MULTIPOLE_ORDER > 5 #error "Missing implementation for order >5" #endif - - /* Softened case */ - } else { - - const double eps_inv = props->epsilon_inv; - const double r = r2 * r_inv; - - /* 0th order term */ - l_b->F_000 += m_a->M_000 * D_soft_000(dx, dy, dz, r, eps_inv); - -#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 - - /* 1st order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_100 * D_soft_100(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_010(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_001(dx, dy, dz, r, eps_inv); - - /* 1st order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_000 * D_soft_100(dx, dy, dz, r, eps_inv); - l_b->F_010 += m_a->M_000 * D_soft_010(dx, dy, dz, r, eps_inv); - l_b->F_001 += m_a->M_000 * D_soft_001(dx, dy, dz, r, eps_inv); -#endif -#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 - - /* 2nd order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_200 * D_soft_200(dx, dy, dz, r, eps_inv) + - m_a->M_020 * D_soft_020(dx, dy, dz, r, eps_inv) + - m_a->M_002 * D_soft_002(dx, dy, dz, r, eps_inv); - l_b->F_000 += m_a->M_110 * D_soft_110(dx, dy, dz, r, eps_inv) + - m_a->M_101 * D_soft_101(dx, dy, dz, r, eps_inv) + - m_a->M_011 * D_soft_011(dx, dy, dz, r, eps_inv); - - /* 2nd order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_100 * D_soft_200(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_110(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_101(dx, dy, dz, r, eps_inv); - l_b->F_010 += m_a->M_100 * D_soft_110(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_020(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_011(dx, dy, dz, r, eps_inv); - l_b->F_001 += m_a->M_100 * D_soft_101(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_011(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_002(dx, dy, dz, r, eps_inv); - - /* 2nd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_000 * D_soft_200(dx, dy, dz, r, eps_inv); - l_b->F_020 += m_a->M_000 * D_soft_020(dx, dy, dz, r, eps_inv); - l_b->F_002 += m_a->M_000 * D_soft_002(dx, dy, dz, r, eps_inv); - l_b->F_110 += m_a->M_000 * D_soft_110(dx, dy, dz, r, eps_inv); - l_b->F_101 += m_a->M_000 * D_soft_101(dx, dy, dz, r, eps_inv); - l_b->F_011 += m_a->M_000 * D_soft_011(dx, dy, dz, r, eps_inv); -#endif -#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 - - /* 3rd order multipole term (addition to rank 0)*/ - l_b->F_000 += m_a->M_300 * D_soft_300(dx, dy, dz, r, eps_inv) + - m_a->M_030 * D_soft_030(dx, dy, dz, r, eps_inv) + - m_a->M_003 * D_soft_003(dx, dy, dz, r, eps_inv); - l_b->F_000 += m_a->M_210 * D_soft_210(dx, dy, dz, r, eps_inv) + - m_a->M_201 * D_soft_201(dx, dy, dz, r, eps_inv) + - m_a->M_120 * D_soft_120(dx, dy, dz, r, eps_inv); - l_b->F_000 += m_a->M_021 * D_soft_021(dx, dy, dz, r, eps_inv) + - m_a->M_102 * D_soft_102(dx, dy, dz, r, eps_inv) + - m_a->M_012 * D_soft_012(dx, dy, dz, r, eps_inv); - l_b->F_000 += m_a->M_111 * D_soft_111(dx, dy, dz, r, eps_inv); - - /* 3rd order multipole term (addition to rank 1)*/ - l_b->F_100 += m_a->M_200 * D_soft_300(dx, dy, dz, r, eps_inv) + - m_a->M_020 * D_soft_120(dx, dy, dz, r, eps_inv) + - m_a->M_002 * D_soft_102(dx, dy, dz, r, eps_inv); - l_b->F_100 += m_a->M_110 * D_soft_210(dx, dy, dz, r, eps_inv) + - m_a->M_101 * D_soft_201(dx, dy, dz, r, eps_inv) + - m_a->M_011 * D_soft_111(dx, dy, dz, r, eps_inv); - l_b->F_010 += m_a->M_200 * D_soft_210(dx, dy, dz, r, eps_inv) + - m_a->M_020 * D_soft_030(dx, dy, dz, r, eps_inv) + - m_a->M_002 * D_soft_012(dx, dy, dz, r, eps_inv); - l_b->F_010 += m_a->M_110 * D_soft_120(dx, dy, dz, r, eps_inv) + - m_a->M_101 * D_soft_111(dx, dy, dz, r, eps_inv) + - m_a->M_011 * D_soft_021(dx, dy, dz, r, eps_inv); - l_b->F_001 += m_a->M_200 * D_soft_201(dx, dy, dz, r, eps_inv) + - m_a->M_020 * D_soft_021(dx, dy, dz, r, eps_inv) + - m_a->M_002 * D_soft_003(dx, dy, dz, r, eps_inv); - l_b->F_001 += m_a->M_110 * D_soft_111(dx, dy, dz, r, eps_inv) + - m_a->M_101 * D_soft_102(dx, dy, dz, r, eps_inv) + - m_a->M_011 * D_soft_012(dx, dy, dz, r, eps_inv); - - /* 3rd order multipole term (addition to rank 2)*/ - l_b->F_200 += m_a->M_100 * D_soft_300(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_210(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_201(dx, dy, dz, r, eps_inv); - l_b->F_020 += m_a->M_100 * D_soft_120(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_030(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_021(dx, dy, dz, r, eps_inv); - l_b->F_002 += m_a->M_100 * D_soft_102(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_012(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_003(dx, dy, dz, r, eps_inv); - l_b->F_110 += m_a->M_100 * D_soft_210(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_120(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_111(dx, dy, dz, r, eps_inv); - l_b->F_101 += m_a->M_100 * D_soft_201(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_111(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_102(dx, dy, dz, r, eps_inv); - l_b->F_011 += m_a->M_100 * D_soft_111(dx, dy, dz, r, eps_inv) + - m_a->M_010 * D_soft_021(dx, dy, dz, r, eps_inv) + - m_a->M_001 * D_soft_012(dx, dy, dz, r, eps_inv); - - /* 3rd order multipole term (addition to rank 3)*/ - l_b->F_300 += m_a->M_000 * D_soft_300(dx, dy, dz, r, eps_inv); - l_b->F_030 += m_a->M_000 * D_soft_030(dx, dy, dz, r, eps_inv); - l_b->F_003 += m_a->M_000 * D_soft_003(dx, dy, dz, r, eps_inv); - l_b->F_210 += m_a->M_000 * D_soft_210(dx, dy, dz, r, eps_inv); - l_b->F_201 += m_a->M_000 * D_soft_201(dx, dy, dz, r, eps_inv); - l_b->F_120 += m_a->M_000 * D_soft_120(dx, dy, dz, r, eps_inv); - l_b->F_021 += m_a->M_000 * D_soft_021(dx, dy, dz, r, eps_inv); - l_b->F_102 += m_a->M_000 * D_soft_102(dx, dy, dz, r, eps_inv); - l_b->F_012 += m_a->M_000 * D_soft_012(dx, dy, dz, r, eps_inv); - l_b->F_111 += m_a->M_000 * D_soft_111(dx, dy, dz, r, eps_inv); -#endif - } } /** @@ -2185,7 +1901,7 @@ INLINE static void gravity_L2L(struct grav_tensor *la, const double pos_a[3], const double pos_b[3]) { /* Initialise everything to zero */ - gravity_field_tensors_init(la); + gravity_field_tensors_init(la, 0); #ifdef SWIFT_DEBUG_CHECKS if (lb->num_interacted == 0) error("Shifting tensors that did not interact"); @@ -2637,57 +2353,95 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, gp->a_grav[2] += a_grav[2]; } +INLINE static void gravity_M2P(const struct multipole *ma, + const struct gravity_props *props, + const double loc[3], struct gpart *gp) { + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + const float eps2 = props->epsilon2; + const float eps_inv = props->epsilon_inv; + const float eps_inv3 = props->epsilon_inv3; + + /* Distance to the multipole */ + const float dx = gp->x[0] - loc[0]; + const float dy = gp->x[1] - loc[1]; + const float dz = gp->x[2] - loc[2]; + const float r2 = dx * dx + dy * dy + dz * dz; + + /* Get the inverse distance */ + const float r_inv = 1.f / sqrtf(r2); + + float f, W; + + if (r2 >= eps2) { + + /* Get Newtonian gravity */ + f = ma->M_000 * r_inv * r_inv * r_inv; + + } else { + + const float r = r2 * r_inv; + const float u = r * eps_inv; + + kernel_grav_eval(u, &W); + + /* Get softened gravity */ + f = ma->M_000 * eps_inv3 * W; + } + + gp->a_grav[0] -= f * dx; + gp->a_grav[1] -= f * dy; + gp->a_grav[2] -= f * dz; + +#endif +} + /** * @brief Checks whether a cell-cell interaction can be appromixated by a M-M - * interaction using the CoM and cell radius at rebuild. + * interaction using the distance and cell radius. * * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179, * Issue 1, pp.27-42, equation 10. * - * @param ma The #multipole of the first #cell. - * @param mb The #multipole of the second #cell. - * @param theta_crit_inv The inverse of the critical opening angle. + * @param r_crit_a The size of the multipole A. + * @param r_crit_b The size of the multipole B. + * @param theta_crit2 The square of the critical opening angle. * @param r2 Square of the distance (periodically wrapped) between the * multipoles. */ -__attribute__((always_inline)) INLINE static int -gravity_multipole_accept_rebuild(const struct gravity_tensors *const ma, - const struct gravity_tensors *const mb, - double theta_crit_inv, double r2) { +__attribute__((always_inline)) INLINE static int gravity_M2L_accept( + double r_crit_a, double r_crit_b, double theta_crit2, double r2) { - const double r_crit_a = ma->r_max_rebuild * theta_crit_inv; - const double r_crit_b = mb->r_max_rebuild * theta_crit_inv; + const double size = r_crit_a + r_crit_b; + const double size2 = size * size; // MATTHIEU: Make this mass-dependent ? /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ - return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b)); + return (r2 * theta_crit2 > size2); } /** - * @brief Checks whether a cell-cell interaction can be appromixated by a M-M - * interaction using the CoM and cell radius at the current time. + * @brief Checks whether a particle-cell interaction can be appromixated by a + * M2P + * interaction using the distance and cell radius. * * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179, * Issue 1, pp.27-42, equation 10. * - * @param ma The #multipole of the first #cell. - * @param mb The #multipole of the second #cell. - * @param theta_crit_inv The inverse of the critical opening angle. + * @param r_max2 The square of the size of the multipole. + * @param theta_crit2 The square of the critical opening angle. * @param r2 Square of the distance (periodically wrapped) between the * multipoles. */ -__attribute__((always_inline)) INLINE static int gravity_multipole_accept( - const struct gravity_tensors *const ma, - const struct gravity_tensors *const mb, double theta_crit_inv, double r2) { - - const double r_crit_a = ma->r_max * theta_crit_inv; - const double r_crit_b = mb->r_max * theta_crit_inv; +__attribute__((always_inline)) INLINE static int gravity_M2P_accept( + float r_max2, float theta_crit2, float r2) { // MATTHIEU: Make this mass-dependent ? /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ - return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b)); + return (r2 * theta_crit2 > r_max2); } #endif /* SWIFT_MULTIPOLE_H */ diff --git a/src/runner.c b/src/runner.c index dd3f3c8e4e59af15485ece16665ffdae85703117..69c1512479e07da0aacad0a9e28bcaa6aafce104 100644 --- a/src/runner.c +++ b/src/runner.c @@ -557,7 +557,7 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { cell_drift_multipole(c, e); /* Reset the gravity acceleration tensors */ - gravity_field_tensors_init(&c->multipole->pot); + gravity_field_tensors_init(&c->multipole->pot, e->ti_current); /* Recurse? */ if (c->split) { @@ -903,7 +903,7 @@ void runner_do_drift_gpart(struct runner *r, struct cell *c, int timer) { TIMER_TIC; - cell_drift_gpart(c, r->e); + cell_drift_gpart(c, r->e, 0); if (timer) TIMER_TOC(timer_drift_gpart); } @@ -1472,17 +1472,19 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (e->policy & engine_policy_self_gravity) { + /* Let's add a self interaction to simplify the count */ + gp->num_interacted++; + /* Check that this gpart has interacted with all the other * particles (via direct or multipoles) in the box */ - gp->num_interacted++; if (gp->num_interacted != (long long)e->s->nr_gparts) error( - "g-particle (id=%lld, type=%d) did not interact " + "g-particle (id=%lld, type=%s) did not interact " "gravitationally " "with all other gparts gp->num_interacted=%lld, " "total_gparts=%zd", - gp->id_or_neg_offset, gp->type, gp->num_interacted, - e->s->nr_gparts); + gp->id_or_neg_offset, part_type_names[gp->type], + gp->num_interacted, e->s->nr_gparts); } #endif } @@ -1900,10 +1902,6 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_self2_force(r, ci, 1); - else if (t->subtype == task_subtype_grav) - runner_dosub_grav(r, ci, cj, 1); - else if (t->subtype == task_subtype_external_grav) - runner_do_grav_external(r, ci, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; @@ -1917,8 +1915,6 @@ void *runner_main(void *data) { #endif else if (t->subtype == task_subtype_force) runner_dosub_pair2_force(r, ci, cj, t->flags, 1); - else if (t->subtype == task_subtype_grav) - runner_dosub_grav(r, ci, cj, 1); else error("Unknown/invalid task subtype (%d).", t->subtype); break; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 69f821f0a991cb797a6ae6b2002ed83986759d86..dbf2311839f62ec25ebba95bc092d9c2306b4dea 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -47,6 +47,8 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted."); + if (c->multipole->pot.ti_init != e->ti_current) + error("c->field tensor not initialised"); #endif if (c->split) { /* Node case */ @@ -61,15 +63,21 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (cp->ti_old_multipole != e->ti_current) error("cp->multipole not drifted."); + if (cp->multipole->pot.ti_init != e->ti_current) + error("cp->field tensor not initialised"); #endif struct grav_tensor shifted_tensor; - /* Shift the field tensor */ - gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM, - c->multipole->CoM); + /* If the tensor received any contribution, push it down */ + if (c->multipole->pot.interacted) { - /* Add it to this level's tensor */ - gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor); + /* Shift the field tensor */ + gravity_L2L(&shifted_tensor, &c->multipole->pot, cp->multipole->CoM, + c->multipole->CoM); + + /* Add it to this level's tensor */ + gravity_field_tensors_add(&cp->multipole->pot, &shifted_tensor); + } /* Recurse */ runner_do_grav_down(r, cp, 0); @@ -78,6 +86,11 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { } else { /* Leaf case */ + /* We can abort early if no interactions via multipole happened */ + if (!c->multipole->pot.interacted) return; + + if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); + /* Apply accelerations to the particles */ for (int i = 0; i < gcount; ++i) { @@ -91,6 +104,8 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { /* Check that particles have been drifted to the current time */ if (gp->ti_drift != e->ti_current) error("gpart not drifted to current time"); + if (c->multipole->pot.ti_init != e->ti_current) + error("c->field tensor not initialised"); #endif /* Apply the kernel */ @@ -135,8 +150,8 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set."); - if (ci->ti_old_multipole != e->ti_current) - error("ci->multipole not drifted."); + if (ci->multipole->pot.ti_init != e->ti_current) + error("ci->grav tensor not initialised."); #endif /* Do we need to drift the multipole ? */ @@ -149,763 +164,460 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, TIMER_TOC(timer_dopair_grav_mm); } -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell using the full Newtonian potential - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - * @param shift The distance vector (periodically wrapped) between the cell - * centres. - */ -void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci, - struct cell *cj, double shift[3]) { - - /* Some constants */ - const struct engine *const e = r->e; - struct gravity_cache *const ci_cache = &r->ci_gravity_cache; - struct gravity_cache *const cj_cache = &r->cj_gravity_cache; +static INLINE void runner_dopair_grav_pp_full(const struct engine *e, + struct gravity_cache *ci_cache, + struct gravity_cache *cj_cache, + int gcount_i, int gcount_j, + int gcount_padded_j, + struct gpart *restrict gparts_i, + struct gpart *restrict gparts_j) { - /* Cell properties */ - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; - struct gpart *restrict gparts_i = ci->gparts; - struct gpart *restrict gparts_j = cj->gparts; - const int ci_active = cell_is_active(ci, e); - const int cj_active = cell_is_active(cj, e); - const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; - const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; - const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]), - 0.5 * (loc_i[1] + loc_j[1]), - 0.5 * (loc_i[2] + loc_j[2])}; + TIMER_TIC; - /* Anything to do here ?*/ - if (!ci_active && !cj_active) return; + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) - error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, - gcount_j); + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; - /* Computed the padded counts */ - const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; - const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; - /* Fill the caches */ - gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, - loc_mean, ci); - gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j, - loc_mean, cj); +#ifdef SWIFT_DEBUG_CHECKS + if (!gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); +#endif - /* Ok... Here we go ! */ + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; - if (ci_active) { + /* Some powers of the softening length */ + const float h_i = ci_cache->epsilon[pid]; + const float h2_i = h_i * h_i; + const float h_inv_i = 1.f / h_i; + const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { + /* Local accumulators for the acceleration */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f; - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_i[pid], e)) continue; + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - /* Some powers of the softening length */ - const float h_i = ci_cache->epsilon[pid]; - const float h2_i = h_i * h_i; - const float h_inv_i = 1.f / h_i; - const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; + /* Get info about j */ + const float x_j = cj_cache->x[pjd]; + const float y_j = cj_cache->y[pjd]; + const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; + /* Compute the pairwise (square) distance. */ + const float dx = x_i - x_j; + const float dy = y_i - y_j; + const float dz = z_i - z_j; + const float r2 = dx * dx + dy * dy + dz * dz; - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); +#ifdef SWIFT_DEBUG_CHECKS + if (r2 == 0.f) error("Interacting particles with 0 distance"); - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; + /* Interact! */ + float f_ij; + runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); - /* Compute the pairwise (square) distance. */ - const float dx = x_i - x_j; - const float dy = y_i - y_j; - const float dz = z_i - z_j; - const float r2 = dx * dx + dy * dy + dz * dz; + /* Store it back */ + a_x -= f_ij * dx; + a_y -= f_ij * dy; + a_z -= f_ij * dz; #ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j) gparts_i[pid].num_interacted++; #endif + } - /* Get the inverse distance */ - const float r_inv = 1.f / sqrtf(r2); - - float f_ij, W_ij; - - if (r2 >= h2_i) { + /* Store everything back in cache */ + ci_cache->a_x[pid] = a_x; + ci_cache->a_y[pid] = a_y; + ci_cache->a_z[pid] = a_z; + } - /* Get Newtonian gravity */ - f_ij = mass_j * r_inv * r_inv * r_inv; + TIMER_TOC(timer_dopair_grav_pp); +} - } else { +static INLINE void runner_dopair_grav_pp_truncated( + const struct engine *e, const float rlr_inv, struct gravity_cache *ci_cache, + struct gravity_cache *cj_cache, int gcount_i, int gcount_j, + int gcount_padded_j, struct gpart *restrict gparts_i, + struct gpart *restrict gparts_j) { - const float r = r2 * r_inv; - const float ui = r * h_inv_i; + TIMER_TIC; - kernel_grav_eval(ui, &W_ij); + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { - /* Get softened gravity */ - f_ij = mass_j * h_inv3_i * W_ij; - } + /* Skip inactive particles */ + if (!ci_cache->active[pid]) continue; - /* Store it back */ - a_x -= f_ij * dx; - a_y -= f_ij * dy; - a_z -= f_ij * dz; + /* Skip particle that can use the multipole */ + if (ci_cache->use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j) gparts_i[pid].num_interacted++; + if (!gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); #endif - } - /* Store everything back in cache */ - ci_cache->a_x[pid] = a_x; - ci_cache->a_y[pid] = a_y; - ci_cache->a_z[pid] = a_z; - } - } + const float x_i = ci_cache->x[pid]; + const float y_i = ci_cache->y[pid]; + const float z_i = ci_cache->z[pid]; - /* Now do the opposite loop */ - if (cj_active) { + /* Some powers of the softening length */ + const float h_i = ci_cache->epsilon[pid]; + const float h2_i = h_i * h_i; + const float h_inv_i = 1.f / h_i; + const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - /* Loop over all particles in ci... */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + /* Local accumulators for the acceleration */ + float a_x = 0.f, a_y = 0.f, a_z = 0.f; - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_j[pjd], e)) continue; + /* Make the compiler understand we are in happy vectorization land */ + swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_j, VEC_SIZE); + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_padded_j; pjd++) { + + /* Get info about j */ const float x_j = cj_cache->x[pjd]; const float y_j = cj_cache->y[pjd]; const float z_j = cj_cache->z[pjd]; + const float mass_j = cj_cache->m[pjd]; - /* Some powers of the softening length */ - const float h_j = cj_cache->epsilon[pjd]; - const float h2_j = h_j * h_j; - const float h_inv_j = 1.f / h_j; - const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j; - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Get info about j */ - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float mass_i = ci_cache->m[pid]; - - /* Compute the pairwise (square) distance. */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - const float r2 = dx * dx + dy * dy + dz * dz; + /* Compute the pairwise (square) distance. */ + const float dx = x_i - x_j; + const float dy = y_i - y_j; + const float dz = z_i - z_j; + const float r2 = dx * dx + dy * dy + dz * dz; #ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); + if (r2 == 0.f) error("Interacting particles with 0 distance"); - /* Check that particles have been drifted to the current time */ - if (gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); - if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); + /* Check that particles have been drifted to the current time */ + if (gparts_i[pid].ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) + error("gpj not drifted to current time"); #endif - /* Get the inverse distance */ - const float r_inv = 1.f / sqrtf(r2); - - float f_ji, W_ji; - - if (r2 >= h2_j) { - - /* Get Newtonian gravity */ - f_ji = mass_i * r_inv * r_inv * r_inv; - - } else { - - const float r = r2 * r_inv; - const float uj = r * h_inv_j; + /* Interact! */ + float f_ij; + runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j, + rlr_inv, &f_ij); - kernel_grav_eval(uj, &W_ji); - - /* Get softened gravity */ - f_ji = mass_i * h_inv3_j * W_ji; - } - - /* Store it back */ - a_x -= f_ji * dx; - a_y -= f_ji * dy; - a_z -= f_ji * dz; + /* Store it back */ + a_x -= f_ij * dx; + a_y -= f_ij * dy; + a_z -= f_ij * dz; #ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pid < gcount_i) gparts_j[pjd].num_interacted++; + /* Update the interaction counter if it's not a padded gpart */ + if (pjd < gcount_j) gparts_i[pid].num_interacted++; #endif - } - - /* Store everything back in cache */ - cj_cache->a_x[pjd] = a_x; - cj_cache->a_y[pjd] = a_y; - cj_cache->a_z[pjd] = a_z; } - } - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i); - if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j); + /* Store everything back in cache */ + ci_cache->a_x[pid] = a_x; + ci_cache->a_y[pid] = a_y; + ci_cache->a_z[pid] = a_z; + } -#ifdef MATTHIEU_OLD_STUFF + TIMER_TOC(timer_dopair_grav_pp); +} - /* Some constants */ - const struct engine *const e = r->e; +static INLINE void runner_dopair_grav_pm( + const struct engine *restrict e, struct gravity_cache *ci_cache, + int gcount_i, int gcount_padded_i, struct gpart *restrict gparts_i, + const float CoM_j[3], const struct multipole *restrict multi_j, + struct cell *restrict cj) { - /* Cell properties */ - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; - struct gpart *restrict gparts_i = ci->gparts; - struct gpart *restrict gparts_j = cj->gparts; + TIMER_TIC; - /* MATTHIEU: Should we use local DP accumulators ? */ + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, ci_cache->active, + SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded_i, VEC_SIZE); /* Loop over all particles in ci... */ - if (cell_is_active(ci, e)) { - for (int pid = 0; pid < gcount_i; pid++) { - - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts_i[pid]; - - if (!gpart_is_active(gpi, e)) continue; - - /* Apply boundary condition */ - const double pix[3] = {gpi->x[0] - shift[0], gpi->x[1] - shift[1], - gpi->x[2] - shift[2]}; - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_j; pjd++) { - - /* Get a hold of the jth part in cj. */ - const struct gpart *restrict gpj = &gparts_j[pjd]; + for (int pid = 0; pid < gcount_padded_i; pid++) { - /* Compute the pairwise distance. */ - const float dx[3] = {pix[0] - gpj->x[0], // x - pix[1] - gpj->x[1], // y - pix[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gpi->ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (gpj->ti_drift != e->ti_current) - error("gpj not drifted to current time"); -#endif + /* Skip inactive particles */ + if (!active[pid]) continue; - /* Interact ! */ - runner_iact_grav_pp_nonsym(r2, dx, gpi, gpj); + /* Skip particle that cannot use the multipole */ + if (!use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; + if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) + error("Active particle went through the cache"); #endif - } - } - } - /* Loop over all particles in cj... */ - if (cell_is_active(cj, e)) { - for (int pjd = 0; pjd < gcount_j; pjd++) { + const float x_i = x[pid]; + const float y_i = y[pid]; + const float z_i = z[pid]; - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpj = &gparts_j[pjd]; - - if (!gpart_is_active(gpj, e)) continue; - - /* Apply boundary condition */ - const double pjx[3] = {gpj->x[0] + shift[0], gpj->x[1] + shift[1], - gpj->x[2] + shift[2]}; + /* Some powers of the softening length */ + const float h_i = epsilon[pid]; + const float h_inv_i = 1.f / h_i; - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_i; pid++) { + /* Distance to the Multipole */ + const float dx = x_i - CoM_j[0]; + const float dy = y_i - CoM_j[1]; + const float dz = z_i - CoM_j[2]; + const float r2 = dx * dx + dy * dy + dz * dz; - /* Get a hold of the ith part in ci. */ - const struct gpart *restrict gpi = &gparts_i[pid]; + /* Interact! */ + float f_x, f_y, f_z; + runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, + &f_z); - /* Compute the pairwise distance. */ - const float dx[3] = {pjx[0] - gpi->x[0], // x - pjx[1] - gpi->x[1], // y - pjx[2] - gpi->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Store it back */ + a_x[pid] = f_x; + a_y[pid] = f_y; + a_z[pid] = f_z; #ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gpi->ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (gpj->ti_drift != e->ti_current) - error("gpj not drifted to current time"); + /* Update the interaction counter */ + if (pid < gcount_i) + gparts_i[pid].num_interacted += cj->multipole->m_pole.num_gpart; #endif - - /* Interact ! */ - runner_iact_grav_pp_nonsym(r2, dx, gpj, gpi); - -#ifdef SWIFT_DEBUG_CHECKS - gpj->num_interacted++; -#endif - } - } } -#endif + + TIMER_TOC(timer_dopair_grav_pm); } /** * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell using the truncated Newtonian potential + * particles of another cell (switching function between full and truncated). * * @param r The #runner. * @param ci The first #cell. * @param cj The other #cell. - * @param shift The distance vector (periodically wrapped) between the cell - * centres. */ -void runner_dopair_grav_pp_truncated(struct runner *r, struct cell *ci, - struct cell *cj, double shift[3]) { +void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { - /* Some constants */ - const struct engine *const e = r->e; - const struct space *s = e->s; + const struct engine *e = r->e; + + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + /* Check that we are not doing something stupid */ + if (ci->split || cj->split) error("Running P-P on splitable cells"); + + /* Let's start by drifting things */ + if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); + if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); + + /* Recover some useful constants */ + struct space *s = e->s; + const int periodic = s->periodic; const double cell_width = s->width[0]; + const float theta_crit2 = e->gravity_properties->theta_crit2; const double a_smooth = e->gravity_properties->a_smooth; + const double r_cut_min = e->gravity_properties->r_cut_min; const double rlr = cell_width * a_smooth; + const double min_trunc = rlr * r_cut_min; const float rlr_inv = 1. / rlr; /* Caches to play with */ struct gravity_cache *const ci_cache = &r->ci_gravity_cache; struct gravity_cache *const cj_cache = &r->cj_gravity_cache; - /* Cell properties */ - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; - struct gpart *restrict gparts_i = ci->gparts; - struct gpart *restrict gparts_j = cj->gparts; + /* Get the distance vector between the pairs, wrapping. */ + double cell_shift[3]; + space_getsid(s, &ci, &cj, cell_shift); + + /* Record activity status */ const int ci_active = cell_is_active(ci, e); const int cj_active = cell_is_active(cj, e); - const double loc_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; - const double loc_j[3] = {cj->loc[0], cj->loc[1], cj->loc[2]}; - const double loc_mean[3] = {0.5 * (loc_i[0] + loc_j[0]), - 0.5 * (loc_i[1] + loc_j[1]), - 0.5 * (loc_i[2] + loc_j[2])}; - /* Anything to do here ?*/ - if (!ci_active && !cj_active) return; + /* Do we need to drift the multipoles ? */ + if (cj_active && ci->ti_old_multipole != e->ti_current) + cell_drift_multipole(ci, e); + if (ci_active && cj->ti_old_multipole != e->ti_current) + cell_drift_multipole(cj, e); + + /* Centre of the cell pair */ + const double loc[3] = {ci->loc[0], // + 0. * ci->width[0], + ci->loc[1], // + 0. * ci->width[1], + ci->loc[2]}; // + 0. * ci->width[2]}; + + /* Shift to apply to the particles in each cell */ + const double shift_i[3] = {loc[0] + cell_shift[0], loc[1] + cell_shift[1], + loc[2] + cell_shift[2]}; + const double shift_j[3] = {loc[0], loc[1], loc[2]}; + + /* Recover the multipole info and shift the CoM locations */ + const float rmax_i = ci->multipole->r_max; + const float rmax_j = cj->multipole->r_max; + const float rmax2_i = rmax_i * rmax_i; + const float rmax2_j = rmax_j * rmax_j; + const struct multipole *multi_i = &ci->multipole->m_pole; + const struct multipole *multi_j = &cj->multipole->m_pole; + const float CoM_i[3] = {ci->multipole->CoM[0] - shift_i[0], + ci->multipole->CoM[1] - shift_i[1], + ci->multipole->CoM[2] - shift_i[2]}; + const float CoM_j[3] = {cj->multipole->CoM[0] - shift_j[0], + cj->multipole->CoM[1] - shift_j[1], + cj->multipole->CoM[2] - shift_j[2]}; - /* Check that we fit in cache */ - if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) - error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, - gcount_j); + /* Start by constructing particle caches */ /* Computed the padded counts */ + const int gcount_i = ci->gcount; + const int gcount_j = cj->gcount; const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; - /* Fill the caches */ - gravity_cache_populate(ci_cache, gparts_i, gcount_i, gcount_padded_i, - loc_mean, ci); - gravity_cache_populate(cj_cache, gparts_j, gcount_j, gcount_padded_j, - loc_mean, cj); - - /* Ok... Here we go ! */ - - if (ci_active) { - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { - - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_i[pid], e)) continue; - - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - - /* Some powers of the softening length */ - const float h_i = ci_cache->epsilon[pid]; - const float h2_i = h_i * h_i; - const float h_inv_i = 1.f / h_i; - const float h_inv3_i = h_inv_i * h_inv_i * h_inv_i; - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(cj_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(cj_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_j, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_padded_j; pjd++) { - - /* Get info about j */ - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - const float mass_j = cj_cache->m[pjd]; - - /* Compute the pairwise (square) distance. */ - const float dx = x_i - x_j; - const float dy = y_i - y_j; - const float dz = z_i - z_j; - const float r2 = dx * dx + dy * dy + dz * dz; - #ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); - - /* Check that particles have been drifted to the current time */ - if (gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); + /* Check that we fit in cache */ + if (gcount_i > ci_cache->count || gcount_j > cj_cache->count) + error("Not enough space in the caches! gcount_i=%d gcount_j=%d", gcount_i, + gcount_j); #endif - /* Get the inverse distance */ - const float r_inv = 1.f / sqrtf(r2); - const float r = r2 * r_inv; - - float f_ij, W_ij, corr_lr; - - if (r2 >= h2_i) { - - /* Get Newtonian gravity */ - f_ij = mass_j * r_inv * r_inv * r_inv; - - } else { - - const float ui = r * h_inv_i; - - kernel_grav_eval(ui, &W_ij); + /* Fill the caches */ + gravity_cache_populate(e->max_active_bin, ci_cache, ci->gparts, gcount_i, + gcount_padded_i, shift_i, CoM_j, rmax2_j, theta_crit2, + ci); + gravity_cache_populate(e->max_active_bin, cj_cache, cj->gparts, gcount_j, + gcount_padded_j, shift_j, CoM_i, rmax2_i, theta_crit2, + cj); - /* Get softened gravity */ - f_ij = mass_j * h_inv3_i * W_ij; - } + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { - /* Get long-range correction */ - const float u_lr = r * rlr_inv; - kernel_long_grav_eval(u_lr, &corr_lr); - f_ij *= corr_lr; + /* Not periodic -> Can always use Newtonian potential */ - /* Store it back */ - a_x -= f_ij * dx; - a_y -= f_ij * dy; - a_z -= f_ij * dz; + /* Let's updated the active cell(s) only */ + if (ci_active) { -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pjd < gcount_j) gparts_i[pid].num_interacted++; -#endif - } + /* First the P2P */ + runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, ci->gparts, cj->gparts); - /* Store everything back in cache */ - ci_cache->a_x[pid] = a_x; - ci_cache->a_y[pid] = a_y; - ci_cache->a_z[pid] = a_z; + /* Then the M2P */ + runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, ci->gparts, + CoM_j, multi_j, cj); } - } - - /* Now do the opposite loop */ - if (cj_active) { - - /* Loop over all particles in ci... */ - for (int pjd = 0; pjd < gcount_j; pjd++) { - - /* Skip inactive particles */ - if (!gpart_is_active(&gparts_j[pjd], e)) continue; - - const float x_j = cj_cache->x[pjd]; - const float y_j = cj_cache->y[pjd]; - const float z_j = cj_cache->z[pjd]; - - /* Some powers of the softening length */ - const float h_j = cj_cache->epsilon[pjd]; - const float h2_j = h_j * h_j; - const float h_inv_j = 1.f / h_j; - const float h_inv3_j = h_inv_j * h_inv_j * h_inv_j; - - /* Local accumulators for the acceleration */ - float a_x = 0.f, a_y = 0.f, a_z = 0.f; - - /* Make the compiler understand we are in happy vectorization land */ - swift_align_information(ci_cache->x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(ci_cache->m, SWIFT_CACHE_ALIGNMENT); - swift_assume_size(gcount_padded_i, VEC_SIZE); - - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_padded_i; pid++) { - - /* Get info about j */ - const float x_i = ci_cache->x[pid]; - const float y_i = ci_cache->y[pid]; - const float z_i = ci_cache->z[pid]; - const float mass_i = ci_cache->m[pid]; - - /* Compute the pairwise (square) distance. */ - const float dx = x_j - x_i; - const float dy = y_j - y_i; - const float dz = z_j - z_i; - const float r2 = dx * dx + dy * dy + dz * dz; - -#ifdef SWIFT_DEBUG_CHECKS - if (r2 == 0.f) error("Interacting particles with 0 distance"); - - /* Check that particles have been drifted to the current time */ - if (gparts_j[pjd].ti_drift != e->ti_current) - error("gpj not drifted to current time"); - if (pid < gcount_i && gparts_i[pid].ti_drift != e->ti_current) - error("gpi not drifted to current time"); -#endif - - /* Get the inverse distance */ - const float r_inv = 1.f / sqrtf(r2); - const float r = r2 * r_inv; - - float f_ji, W_ji, corr_lr; - - if (r2 >= h2_j) { - - /* Get Newtonian gravity */ - f_ji = mass_i * r_inv * r_inv * r_inv; - - } else { - - const float uj = r * h_inv_j; - - kernel_grav_eval(uj, &W_ji); - - /* Get softened gravity */ - f_ji = mass_i * h_inv3_j * W_ji; - } - - /* Get long-range correction */ - const float u_lr = r * rlr_inv; - kernel_long_grav_eval(u_lr, &corr_lr); - f_ji *= corr_lr; - - /* Store it back */ - a_x -= f_ji * dx; - a_y -= f_ji * dy; - a_z -= f_ji * dz; - -#ifdef SWIFT_DEBUG_CHECKS - /* Update the interaction counter if it's not a padded gpart */ - if (pid < gcount_i) gparts_j[pjd].num_interacted++; -#endif - } - - /* Store everything back in cache */ - cj_cache->a_x[pjd] = a_x; - cj_cache->a_y[pjd] = a_y; - cj_cache->a_z[pjd] = a_z; + if (cj_active) { + + /* First the P2P */ + runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, cj->gparts, ci->gparts); + /* Then the M2P */ + runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, cj->gparts, + CoM_i, multi_i, ci); } - } - /* Write back to the particles */ - if (ci_active) gravity_cache_write_back(ci_cache, gparts_i, gcount_i); - if (cj_active) gravity_cache_write_back(cj_cache, gparts_j, gcount_j); + } else { /* Periodic BC */ -#ifdef MATTHIEU_OLD_STUFF - /* Some constants */ - const struct engine *const e = r->e; - const struct space *s = e->s; - const double cell_width = s->width[0]; - const double a_smooth = e->gravity_properties->a_smooth; - const double rlr = cell_width * a_smooth; - const float rlr_inv = 1. / rlr; - - /* Cell properties */ - const int gcount_i = ci->gcount; - const int gcount_j = cj->gcount; - struct gpart *restrict gparts_i = ci->gparts; - struct gpart *restrict gparts_j = cj->gparts; - - /* MATTHIEU: Should we use local DP accumulators ? */ - - /* Loop over all particles in ci... */ - if (cell_is_active(ci, e)) { - for (int pid = 0; pid < gcount_i; pid++) { - - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts_i[pid]; - - if (!gpart_is_active(gpi, e)) continue; - - /* Apply boundary condition */ - const double pix[3] = {gpi->x[0] - shift[0], gpi->x[1] - shift[1], - gpi->x[2] - shift[2]}; + /* Get the relative distance between the CoMs */ + const double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], + CoM_j[2] - CoM_i[2]}; + const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + /* Get the maximal distance between any two particles */ + const double max_r = sqrt(r2) + rmax_i + rmax_j; - /* Get a hold of the jth part in cj. */ - const struct gpart *restrict gpj = &gparts_j[pjd]; + /* Do we need to use the truncated interactions ? */ + if (max_r > min_trunc) { - /* Compute the pairwise distance. */ - const float dx[3] = {pix[0] - gpj->x[0], // x - pix[1] - gpj->x[1], // y - pix[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Periodic but far-away cells must use the truncated potential */ -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gpi->ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (gpj->ti_drift != e->ti_current) - error("gpj not drifted to current time"); -#endif + /* Let's updated the active cell(s) only */ + if (ci_active) { - /* Interact ! */ - runner_iact_grav_pp_truncated_nonsym(r2, dx, gpi, gpj, rlr_inv); + /* First the (truncated) P2P */ + runner_dopair_grav_pp_truncated(e, rlr_inv, ci_cache, cj_cache, + gcount_i, gcount_j, gcount_padded_j, + ci->gparts, cj->gparts); -#ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; -#endif + /* Then the M2P */ + runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, + ci->gparts, CoM_j, multi_j, cj); } - } - } + if (cj_active) { - /* Loop over all particles in cj... */ - if (cell_is_active(cj, e)) { - for (int pjd = 0; pjd < gcount_j; pjd++) { + /* First the (truncated) P2P */ + runner_dopair_grav_pp_truncated(e, rlr_inv, cj_cache, ci_cache, + gcount_j, gcount_i, gcount_padded_i, + cj->gparts, ci->gparts); - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpj = &gparts_j[pjd]; - - if (!gpart_is_active(gpj, e)) continue; + /* Then the M2P */ + runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, + cj->gparts, CoM_i, multi_i, ci); + } - /* Apply boundary condition */ - const double pjx[3] = {gpj->x[0] + shift[0], gpj->x[1] + shift[1], - gpj->x[2] + shift[2]}; + } else { - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_i; pid++) { + /* Periodic but close-by cells can use the full Newtonian potential */ - /* Get a hold of the ith part in ci. */ - const struct gpart *restrict gpi = &gparts_i[pid]; + /* Let's updated the active cell(s) only */ + if (ci_active) { - /* Compute the pairwise distance. */ - const float dx[3] = {pjx[0] - gpi->x[0], // x - pjx[1] - gpi->x[1], // y - pjx[2] - gpi->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* First the (Newtonian) P2P */ + runner_dopair_grav_pp_full(e, ci_cache, cj_cache, gcount_i, gcount_j, + gcount_padded_j, ci->gparts, cj->gparts); -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gpi->ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (gpj->ti_drift != e->ti_current) - error("gpj not drifted to current time"); -#endif + /* Then the M2P */ + runner_dopair_grav_pm(e, ci_cache, gcount_i, gcount_padded_i, + ci->gparts, CoM_j, multi_j, cj); + } + if (cj_active) { - /* Interact ! */ - runner_iact_grav_pp_truncated_nonsym(r2, dx, gpj, gpi, rlr_inv); + /* First the (Newtonian) P2P */ + runner_dopair_grav_pp_full(e, cj_cache, ci_cache, gcount_j, gcount_i, + gcount_padded_i, cj->gparts, ci->gparts); -#ifdef SWIFT_DEBUG_CHECKS - gpj->num_interacted++; -#endif + /* Then the M2P */ + runner_dopair_grav_pm(e, cj_cache, gcount_j, gcount_padded_j, + cj->gparts, CoM_i, multi_i, ci); } } } -#endif -} - -/** - * @brief Computes the interaction of all the particles in a cell with all the - * particles of another cell (switching function between full and truncated). - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The other #cell. - */ -void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { - - /* Some properties of the space */ - const struct engine *e = r->e; - const struct space *s = e->s; - const int periodic = s->periodic; - const double cell_width = s->width[0]; - const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const double a_smooth = e->gravity_properties->a_smooth; - const double r_cut_min = e->gravity_properties->r_cut_min; - const double min_trunc = cell_width * r_cut_min * a_smooth; - double shift[3] = {0.0, 0.0, 0.0}; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - /* Let's start by drifting things */ - if (!cell_are_gpart_drifted(ci, e)) cell_drift_gpart(ci, e); - if (!cell_are_gpart_drifted(cj, e)) cell_drift_gpart(cj, e); - - /* Can we use the Newtonian version or do we need the truncated one ? */ - if (!periodic) { - runner_dopair_grav_pp_full(r, ci, cj, shift); - } else { - - /* Get the relative distance between the pairs, wrapping. */ - shift[0] = nearest(cj->loc[0] - ci->loc[0], dim[0]); - shift[1] = nearest(cj->loc[1] - ci->loc[1], dim[1]); - shift[2] = nearest(cj->loc[2] - ci->loc[2], dim[2]); - const double r2 = - shift[0] * shift[0] + shift[1] * shift[1] + shift[2] * shift[2]; - - /* Get the maximal distance between any two particles */ - const double max_r = sqrt(r2) + ci->multipole->r_max + cj->multipole->r_max; - - /* Do we need to use the truncated interactions ? */ - if (max_r > min_trunc) - runner_dopair_grav_pp_truncated(r, ci, cj, shift); - else - runner_dopair_grav_pp_full(r, ci, cj, shift); - } + /* Write back to the particles */ + if (ci_active) gravity_cache_write_back(ci_cache, ci->gparts, gcount_i); + if (cj_active) gravity_cache_write_back(cj_cache, cj->gparts, gcount_j); - TIMER_TOC(timer_dopair_grav_pp); + TIMER_TOC(timer_dopair_grav_branch); } /** @@ -934,14 +646,17 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { /* Anything to do here ?*/ if (!c_active) return; +#ifdef SWIFT_DEBUG_CHECKS /* Check that we fit in cache */ if (gcount > ci_cache->count) error("Not enough space in the cache! gcount=%d", gcount); +#endif /* Computed the padded counts */ const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; - gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc, c); + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount, + gcount_padded, loc, c); /* Ok... Here we go ! */ @@ -949,7 +664,7 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { for (int pid = 0; pid < gcount; pid++) { /* Skip inactive particles */ - if (!gpart_is_active(&gparts[pid], e)) continue; + if (!ci_cache->active[pid]) continue; const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; @@ -999,26 +714,9 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { error("gpj not drifted to current time"); #endif - /* Get the inverse distance */ - const float r_inv = 1.f / sqrtf(r2); - - float f_ij, W_ij; - - if (r2 >= h2_i) { - - /* Get Newtonian gravity */ - f_ij = mass_j * r_inv * r_inv * r_inv; - - } else { - - const float r = r2 * r_inv; - const float ui = r * h_inv_i; - - kernel_grav_eval(ui, &W_ij); - - /* Get softened gravity */ - f_ij = mass_j * h_inv3_i * W_ij; - } + /* Interact! */ + float f_ij; + runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, mass_j, &f_ij); /* Store it back */ a_x -= f_ij * dx; @@ -1039,80 +737,6 @@ void runner_doself_grav_pp_full(struct runner *r, struct cell *c) { /* Write back to the particles */ gravity_cache_write_back(ci_cache, gparts, gcount); - -#ifdef MATTHIEU_OLD_STUFF - - /* Some constants */ - const struct engine *const e = r->e; - - /* Cell properties */ - const int gcount = c->gcount; - struct gpart *restrict gparts = c->gparts; - - /* MATTHIEU: Should we use local DP accumulators ? */ - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts[pid]; - - /* Loop over every particle in the other cell. */ - for (int pjd = pid + 1; pjd < gcount; pjd++) { - - /* Get a hold of the jth part in ci. */ - struct gpart *restrict gpj = &gparts[pjd]; - - /* Compute the pairwise distance. */ - float dx[3] = {gpi->x[0] - gpj->x[0], // x - gpi->x[1] - gpj->x[1], // y - gpi->x[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gpi->ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (gpj->ti_drift != e->ti_current) - error("gpj not drifted to current time"); -#endif - - /* Interact ! */ - if (gpart_is_active(gpi, e) && gpart_is_active(gpj, e)) { - - runner_iact_grav_pp(r2, dx, gpi, gpj); - -#ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; - gpj->num_interacted++; -#endif - - } else { - - if (gpart_is_active(gpi, e)) { - - runner_iact_grav_pp_nonsym(r2, dx, gpi, gpj); - -#ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; -#endif - - } else if (gpart_is_active(gpj, e)) { - - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; - runner_iact_grav_pp_nonsym(r2, dx, gpj, gpi); - -#ifdef SWIFT_DEBUG_CHECKS - gpj->num_interacted++; -#endif - } - } - } - } - -#endif } /** @@ -1148,14 +772,17 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { /* Anything to do here ?*/ if (!c_active) return; +#ifdef SWIFT_DEBUG_CHECKS /* Check that we fit in cache */ if (gcount > ci_cache->count) error("Not enough space in the caches! gcount=%d", gcount); +#endif /* Computed the padded counts */ const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; - gravity_cache_populate(ci_cache, gparts, gcount, gcount_padded, loc, c); + gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, gparts, gcount, + gcount_padded, loc, c); /* Ok... Here we go ! */ @@ -1163,7 +790,7 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { for (int pid = 0; pid < gcount; pid++) { /* Skip inactive particles */ - if (!gpart_is_active(&gparts[pid], e)) continue; + if (!ci_cache->active[pid]) continue; const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; @@ -1213,31 +840,10 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { error("gpj not drifted to current time"); #endif - /* Get the inverse distance */ - const float r_inv = 1.f / sqrtf(r2); - const float r = r2 * r_inv; - - float f_ij, W_ij, corr_lr; - - if (r2 >= h2_i) { - - /* Get Newtonian gravity */ - f_ij = mass_j * r_inv * r_inv * r_inv; - - } else { - - const float ui = r * h_inv_i; - - kernel_grav_eval(ui, &W_ij); - - /* Get softened gravity */ - f_ij = mass_j * h_inv3_i * W_ij; - } - - /* Get long-range correction */ - const float u_lr = r * rlr_inv; - kernel_long_grav_eval(u_lr, &corr_lr); - f_ij *= corr_lr; + /* Interact! */ + float f_ij; + runner_iact_grav_pp_truncated(r2, h2_i, h_inv_i, h_inv3_i, mass_j, + rlr_inv, &f_ij); /* Store it back */ a_x -= f_ij * dx; @@ -1258,83 +864,6 @@ void runner_doself_grav_pp_truncated(struct runner *r, struct cell *c) { /* Write back to the particles */ gravity_cache_write_back(ci_cache, gparts, gcount); - -#ifdef MATTHIEU_OLD_STUFF - /* Some constants */ - const struct engine *const e = r->e; - const struct space *s = e->s; - const double cell_width = s->width[0]; - const double a_smooth = e->gravity_properties->a_smooth; - const double rlr = cell_width * a_smooth; - const float rlr_inv = 1. / rlr; - - /* Cell properties */ - const int gcount = c->gcount; - struct gpart *restrict gparts = c->gparts; - - /* MATTHIEU: Should we use local DP accumulators ? */ - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts[pid]; - - /* Loop over every particle in the other cell. */ - for (int pjd = pid + 1; pjd < gcount; pjd++) { - - /* Get a hold of the jth part in ci. */ - struct gpart *restrict gpj = &gparts[pjd]; - - /* Compute the pairwise distance. */ - float dx[3] = {gpi->x[0] - gpj->x[0], // x - gpi->x[1] - gpj->x[1], // y - gpi->x[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - -#ifdef SWIFT_DEBUG_CHECKS - /* Check that particles have been drifted to the current time */ - if (gpi->ti_drift != e->ti_current) - error("gpi not drifted to current time"); - if (gpj->ti_drift != e->ti_current) - error("gpj not drifted to current time"); -#endif - - /* Interact ! */ - if (gpart_is_active(gpi, e) && gpart_is_active(gpj, e)) { - - runner_iact_grav_pp_truncated(r2, dx, gpi, gpj, rlr_inv); - -#ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; - gpj->num_interacted++; -#endif - - } else { - - if (gpart_is_active(gpi, e)) { - - runner_iact_grav_pp_truncated_nonsym(r2, dx, gpi, gpj, rlr_inv); - -#ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; -#endif - - } else if (gpart_is_active(gpj, e)) { - - dx[0] = -dx[0]; - dx[1] = -dx[1]; - dx[2] = -dx[2]; - runner_iact_grav_pp_truncated_nonsym(r2, dx, gpj, gpi, rlr_inv); - -#ifdef SWIFT_DEBUG_CHECKS - gpj->num_interacted++; -#endif - } - } - } - } -#endif } /** @@ -1364,8 +893,11 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { /* Anything to do here? */ if (!cell_is_active(c, e)) return; + /* Check that we are not doing something stupid */ + if (c->split) error("Running P-P on a splitable cell"); + /* Do we need to start by drifting things ? */ - if (!cell_are_gpart_drifted(c, e)) cell_drift_gpart(c, e); + if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { @@ -1373,7 +905,7 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { } else { /* Get the maximal distance between any two particles */ - const double max_r = 2 * c->multipole->r_max; + const double max_r = 2. * c->multipole->r_max; /* Do we need to use the truncated interactions ? */ if (max_r > min_trunc) @@ -1406,7 +938,7 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, const double cell_width = s->width[0]; const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; const struct gravity_props *props = e->gravity_properties; - const double theta_crit_inv = props->theta_crit_inv; + const double theta_crit2 = props->theta_crit2; const double max_distance = props->a_smooth * props->r_cut_max * cell_width; const double max_distance2 = max_distance * max_distance; @@ -1467,7 +999,7 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, * option... */ /* Can we use M-M interactions ? */ - if (gravity_multipole_accept(multi_i, multi_j, theta_crit_inv, r2)) { + if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) { /* MATTHIEU: make a symmetric M-M interaction function ! */ runner_dopair_grav_mm(r, ci, cj); @@ -1588,20 +1120,6 @@ void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) { if (gettimer) TIMER_TOC(timer_dosub_self_grav); } -void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, - int timer) { - - /* Is this a single cell? */ - if (cj == NULL) { - - runner_doself_grav(r, ci, 1); - - } else { - - runner_dopair_grav(r, ci, cj, 1); - } -} - /** * @brief Performs all M-M interactions between a given top-level cell and all * the other top-levels that are far enough. @@ -1632,7 +1150,7 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { const int periodic = s->periodic; const double cell_width = s->width[0]; const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; - const double theta_crit_inv = props->theta_crit_inv; + const double theta_crit2 = props->theta_crit2; const double max_distance = props->a_smooth * props->r_cut_max * cell_width; const double max_distance2 = max_distance * max_distance; @@ -1691,7 +1209,7 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { } /* Check the multipole acceptance criterion */ - if (gravity_multipole_accept(multi_i, multi_j, theta_crit_inv, r2)) { + if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, theta_crit2, r2)) { /* Go for a (non-symmetric) M-M calculation */ runner_dopair_grav_mm(r, ci, cj); @@ -1714,8 +1232,8 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { const double r2_rebuild = dx * dx + dy * dy + dz * dz; /* Is the criterion violated now but was OK at the last rebuild ? */ - if (gravity_multipole_accept_rebuild(multi_i, multi_j, theta_crit_inv, - r2_rebuild)) { + if (gravity_M2L_accept(multi_i->r_max_rebuild, multi_j->r_max_rebuild, + theta_crit2, r2_rebuild)) { /* Alright, we have to take charge of that pair in a different way. */ // MATTHIEU: We should actually open the tree-node here and recurse. diff --git a/src/scheduler.c b/src/scheduler.c index d0eeb8cb726cf53321d1b4e6a028f2914246cbf2..b1cc1a572d3344e7b1e2338c7594da0edff58919 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -649,18 +649,8 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { ci->progeny[k]), s); } - } - - /* Otherwise, make sure the self task has a drift task */ - else { - - lock_lock(&ci->lock); + } /* Cell is split */ - if (ci->drift_gpart == NULL) - ci->drift_gpart = scheduler_addtask( - s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); - lock_unlock_blind(&ci->lock); - } } /* Self interaction */ /* Pair interaction? */ @@ -675,28 +665,6 @@ static void scheduler_splittask_gravity(struct task *t, struct scheduler *s) { t->skip = 1; break; } - - /* Should this task be split-up? */ - if (0 && ci->split && cj->split) { - - // MATTHIEU: nothing here for now - - } else { - - /* Create the drift for ci. */ - lock_lock(&ci->lock); - if (ci->drift_gpart == NULL && ci->nodeID == engine_rank) - ci->drift_gpart = scheduler_addtask( - s, task_type_drift_gpart, task_subtype_none, 0, 0, ci, NULL); - lock_unlock_blind(&ci->lock); - - /* Create the drift for cj. */ - lock_lock(&cj->lock); - if (cj->drift_gpart == NULL && cj->nodeID == engine_rank) - cj->drift_gpart = scheduler_addtask( - s, task_type_drift_gpart, task_subtype_none, 0, 0, cj, NULL); - lock_unlock_blind(&cj->lock); - } } /* pair interaction? */ } /* iterate over the current task. */ } @@ -727,7 +695,7 @@ void scheduler_splittasks_mapper(void *map_data, int num_elements, scheduler_splittask_gravity(t, s); } else if (t->type == task_type_grav_top_level || t->type == task_type_grav_ghost) { - // MATTHIEU: for the future + /* For future use */ } else { error("Unexpected task sub-type"); } diff --git a/src/timers.c b/src/timers.c index 62eac20596a082e411ced61a86f32bef9edcb636..fec111dd939528bd0648609d8a1f5f83e595ec02 100644 --- a/src/timers.c +++ b/src/timers.c @@ -54,8 +54,9 @@ const char* timers_names[timer_count] = { "dopair_density", "dopair_gradient", "dopair_force", - "dopair_grav_pm", + "dopair_grav_branch", "dopair_grav_mm", + "dopair_grav_pm", "dopair_grav_pp", "dograv_external", "dograv_down", @@ -119,8 +120,9 @@ void timers_reset_all() { timers_reset(timers_mask_all); } void timers_print(int step) { fprintf(timers_file, "%d\t", step); for (int k = 0; k < timer_count; k++) - fprintf(timers_file, "%.3f\t", clocks_from_ticks(timers[k])); + fprintf(timers_file, "%18.3f ", clocks_from_ticks(timers[k])); fprintf(timers_file, "\n"); + fflush(timers_file); } /** @@ -136,7 +138,7 @@ void timers_open_file(int rank) { fprintf(timers_file, "# timers: \n# step | "); for (int k = 0; k < timer_count; k++) - fprintf(timers_file, "%s\t", timers_names[k]); + fprintf(timers_file, "%18s ", timers_names[k]); fprintf(timers_file, "\n"); } diff --git a/src/timers.h b/src/timers.h index 9248be4f3048e468deed476f822947eed3c4ce56..38ede8251eb5d640282e728e17d9330956a1cba8 100644 --- a/src/timers.h +++ b/src/timers.h @@ -55,8 +55,9 @@ enum { timer_dopair_density, timer_dopair_gradient, timer_dopair_force, - timer_dopair_grav_pm, + timer_dopair_grav_branch, timer_dopair_grav_mm, + timer_dopair_grav_pm, timer_dopair_grav_pp, timer_dograv_external, timer_dograv_down, diff --git a/src/tools.c b/src/tools.c index 7d69ebc6c476312081d8a8c34c76c6592da5cab0..3ee55db3d5f5348699372d2620b6d15af38b23d0 100644 --- a/src/tools.c +++ b/src/tools.c @@ -400,64 +400,6 @@ void self_all_force(struct runner *r, struct cell *ci) { } } -void pairs_single_grav(double *dim, long long int pid, - struct gpart *restrict gparts, const struct part *parts, - int N, int periodic) { - - int i, k; - // int mj, mk; - // double maxratio = 1.0; - double r2, dx[3]; - float fdx[3], a[3] = {0.0, 0.0, 0.0}, aabs[3] = {0.0, 0.0, 0.0}; - struct gpart pi, pj; - // double ih = 12.0/6.25; - - /* Find "our" part. */ - for (k = 0; k < N; k++) - if ((gparts[k].id_or_neg_offset < 0 && - parts[-gparts[k].id_or_neg_offset].id == pid) || - gparts[k].id_or_neg_offset == pid) - break; - if (k == N) error("Part not found."); - pi = gparts[k]; - pi.a_grav[0] = 0.0f; - pi.a_grav[1] = 0.0f; - pi.a_grav[2] = 0.0f; - - /* Loop over all particle pairs. */ - for (k = 0; k < N; k++) { - if (gparts[k].id_or_neg_offset == pi.id_or_neg_offset) continue; - pj = gparts[k]; - for (i = 0; i < 3; i++) { - dx[i] = pi.x[i] - pj.x[i]; - if (periodic) { - if (dx[i] < -dim[i] / 2) - dx[i] += dim[i]; - else if (dx[i] > dim[i] / 2) - dx[i] -= dim[i]; - } - fdx[i] = dx[i]; - } - r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2]; - runner_iact_grav_pp(r2, fdx, &pi, &pj); - a[0] += pi.a_grav[0]; - a[1] += pi.a_grav[1]; - a[2] += pi.a_grav[2]; - aabs[0] += fabsf(pi.a_grav[0]); - aabs[1] += fabsf(pi.a_grav[1]); - aabs[2] += fabsf(pi.a_grav[2]); - pi.a_grav[0] = 0.0f; - pi.a_grav[1] = 0.0f; - pi.a_grav[2] = 0.0f; - } - - /* Dump the result. */ - message( - "acceleration on gpart %lli is a=[ %e %e %e ], |a|=[ %.2e %.2e %.2e ].\n", - parts[-pi.id_or_neg_offset].id, a[0], a[1], a[2], aabs[0], aabs[1], - aabs[2]); -} - /** * @brief Compute the force on a single particle brute-force. */ @@ -747,69 +689,3 @@ int compare_particles(struct part a, struct part b, double threshold) { #endif } - -/** - * @brief Computes the forces between all g-particles using the N^2 algorithm - * - * Overwrites the accelerations of the gparts with the values. - * Do not use for actual runs. - * - * @brief gparts The array of particles. - * @brief gcount The number of particles. - * @brief constants Physical constants in internal units. - * @brief gravity_properties Constants governing the gravity scheme. - */ -void gravity_n2(struct gpart *gparts, const int gcount, - const struct phys_const *constants, - const struct gravity_props *gravity_properties, float rlr) { - - const float rlr_inv = 1. / rlr; - const float r_cut = gravity_properties->r_cut_max; - const float max_d = r_cut * rlr; - const float max_d2 = max_d * max_d; - - message("rlr_inv= %f", rlr_inv); - message("max_d: %f", max_d); - - /* Reset everything */ - for (int pid = 0; pid < gcount; pid++) { - struct gpart *restrict gpi = &gparts[pid]; - gpi->a_grav[0] = 0.f; - gpi->a_grav[1] = 0.f; - gpi->a_grav[2] = 0.f; - } - - /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount; pid++) { - - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts[pid]; - - for (int pjd = pid + 1; pjd < gcount; pjd++) { - - /* Get a hold of the jth part in ci. */ - struct gpart *restrict gpj = &gparts[pjd]; - - /* Compute the pairwise distance. */ - const float dx[3] = {gpi->x[0] - gpj->x[0], // x - gpi->x[1] - gpj->x[1], // y - gpi->x[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - - if (r2 < max_d2 || 1) { - - /* Apply the gravitational acceleration. */ - runner_iact_grav_pp(r2, dx, gpi, gpj); - } - } - } - - /* Multiply by Newton's constant */ - const double const_G = constants->const_newton_G; - for (int pid = 0; pid < gcount; pid++) { - struct gpart *restrict gpi = &gparts[pid]; - gpi->a_grav[0] *= const_G; - gpi->a_grav[1] *= const_G; - gpi->a_grav[2] *= const_G; - } -} diff --git a/tests/Makefile.am b/tests/Makefile.am index 27e6ecf4fad565a28825afb7890833fce0f57318..553980a93e907e83b65bb4539ca49c8bc1b7207b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,7 +25,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr testParser.sh testSPHStep test125cells.sh test125cellsPerturbed.sh testFFT \ testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \ testMatrixInversion testThreadpool testDump testLogger testInteractions.sh \ - testVoronoi1D testVoronoi2D testVoronoi3D \ + testVoronoi1D testVoronoi2D testVoronoi3D testGravityDerivatives \ testPeriodicBC.sh testPeriodicBCPerturbed.sh # List of test programs to compile @@ -35,7 +35,8 @@ check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSymmetry testThreadpool \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger \ - testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC + testVoronoi1D testVoronoi2D testVoronoi3D testPeriodicBC \ + testGravityDerivatives # Rebuild tests when SWIFT is updated. $(check_PROGRAMS): ../src/.libs/libswiftsim.a @@ -93,6 +94,8 @@ testDump_SOURCES = testDump.c testLogger_SOURCES = testLogger.c +testGravityDerivatives_SOURCES = testGravityDerivatives.c + # Files necessary for distribution EXTRA_DIST = testReading.sh makeInput.py testActivePair.sh \ test27cells.sh test27cellsPerturbed.sh testParser.sh testPeriodicBC.sh \ diff --git a/tests/testGravityDerivatives.c b/tests/testGravityDerivatives.c new file mode 100644 index 0000000000000000000000000000000000000000..0a811cbda491c40f2f1db7bac5b1f3e2f7508b59 --- /dev/null +++ b/tests/testGravityDerivatives.c @@ -0,0 +1,1048 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "../config.h" + +/* Some standard headers. */ +#include <fenv.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Local headers. */ +#include "swift.h" + +/*************************/ +/* 0th order derivatives */ +/*************************/ + +/** + * @brief \f$ \phi(r_x, r_y, r_z) \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_000(double r_x, double r_y, double r_z, double r_inv) { + + return r_inv; +} + +/*************************/ +/* 1st order derivatives */ +/*************************/ + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_100(double r_x, double r_y, double r_z, double r_inv) { + + return -r_x * r_inv * r_inv * r_inv; +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_010(double r_x, double r_y, double r_z, double r_inv) { + + return -r_y * r_inv * r_inv * r_inv; +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_001(double r_x, double r_y, double r_z, double r_inv) { + + return -r_z * r_inv * r_inv * r_inv; +} + +/*************************/ +/* 2nd order derivatives */ +/*************************/ + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_200(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv3 = r_inv * r_inv2; + const double r_inv5 = r_inv3 * r_inv2; + return 3. * r_x * r_x * r_inv5 - r_inv3; +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_020(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv3 = r_inv * r_inv2; + const double r_inv5 = r_inv3 * r_inv2; + return 3. * r_y * r_y * r_inv5 - r_inv3; +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_z^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_002(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv3 = r_inv * r_inv2; + const double r_inv5 = r_inv3 * r_inv2; + return 3. * r_z * r_z * r_inv5 - r_inv3; +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_110(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + return 3. * r_x * r_y * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_101(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + return 3. * r_x * r_z * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_011(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + return 3. * r_y * r_z * r_inv5; +} + +/*************************/ +/* 3rd order derivatives */ +/*************************/ + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^3} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_300(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_x * r_x * r_x * r_inv7 + 9. * r_x * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^3} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_030(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_y * r_y * r_y * r_inv7 + 9. * r_y * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z^3} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_003(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_z * r_z * r_z * r_inv7 + 9. * r_z * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_y} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_210(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_x * r_x * r_y * r_inv7 + 3. * r_y * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x^2\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_201(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_x * r_x * r_z * r_inv7 + 3. * r_z * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_y^2} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_120(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_x * r_y * r_y * r_inv7 + 3. * r_x * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y^2\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_021(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_z * r_y * r_y * r_inv7 + 3. * r_z * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_x\partial r_z^2} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_102(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_x * r_z * r_z * r_inv7 + 3. * r_x * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_y\partial r_z^2} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_012(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv2 = r_inv * r_inv; + const double r_inv5 = r_inv2 * r_inv2 * r_inv; + const double r_inv7 = r_inv5 * r_inv2; + return -15. * r_y * r_z * r_z * r_inv7 + 3. * r_y * r_inv5; +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z)}{\partial r_z\partial + * r_y\partial r_z} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r_inv Inverse of the norm of the distance vector (\f$ |r|^{-1} \f$) + */ +INLINE static double D_111(double r_x, double r_y, double r_z, double r_inv) { + const double r_inv3 = r_inv * r_inv * r_inv; + const double r_inv7 = r_inv3 * r_inv3 * r_inv; + return -15. * r_x * r_y * r_z * r_inv7; +} + +/*********************************/ +/* 4th order gravity derivatives */ +/*********************************/ + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_z^4 }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_004(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_z * r_z * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 * + (r_z * r_z) + + 3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0; + /* 5 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_y^1 \partial_z^3 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_013(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_z * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_y * r_z); + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_y^2 \partial_z^2 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_022(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_y * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_z * r_z) + + 3. * r_inv * r_inv * r_inv * r_inv * r_inv; + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_y^3 \partial_z^1 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_031(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_y * r_y * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_y * r_z); + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_y^4 }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_040(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_y * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 * + (r_y * r_y) + + 3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0; + /* 5 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_z^3 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_103(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_z * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x * r_z); + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^1 \partial_z^2 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_112(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_y * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_x * r_y); + /* 13 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^2 \partial_z^1 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_121(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_y * r_y * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_x * r_z); + /* 13 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^1 \partial_y^3 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_130(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_y * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x * r_y); + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_z^2 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_202(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_x * r_x) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_z * r_z) + + 3. * r_inv * r_inv * r_inv * r_inv * r_inv; + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^1 \partial_z^1 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_211(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_y * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_y * r_z); + /* 13 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^2 \partial_y^2 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_220(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_x * r_x) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + (r_y * r_y) + + 3. * r_inv * r_inv * r_inv * r_inv * r_inv; + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_z^1 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_301(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_x * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x * r_z); + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^3 \partial_y^1 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_310(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_x * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x * r_y); + /* 11 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^4}{ \partial_x^4 }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_400(double r_x, double r_y, double r_z, double r_inv) { + return +105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_x * r_x) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 6.0 * + (r_x * r_x) + + 3. * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0; + /* 5 zero-valued terms not written out */ +} + +/*********************************/ +/* 5th order gravity derivatives */ +/*********************************/ + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_z^5 }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_005(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_z * r_z * r_z * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 10.0 * (r_z * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 * + (r_z); + /* 26 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_y^1 \partial_z^4 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_014(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_y * r_z * r_z * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 6.0 * (r_y * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_y); + /* 42 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_y^2 \partial_z^3 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_023(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_y * r_y * r_z * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_y * r_y * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_z * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_z); + /* 44 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_y^3 \partial_z^2 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_032(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_y * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_y * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_y); + /* 44 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_y^4 \partial_z^1 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_041(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 6.0 * (r_y * r_y * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_z); + /* 42 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_y^5 }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_050(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_y * r_y * r_y * r_y * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 10.0 * (r_y * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 * + (r_y); + /* 26 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_z^4 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_104(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_z * r_z * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 6.0 * (r_x * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x); + /* 42 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^1 \partial_z^3 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_113(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_y * r_z * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_y * r_z); + /* 48 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^2 \partial_z^2 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_122(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_y * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_x); + /* 48 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^3 \partial_z^1 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_131(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_y * r_z); + /* 48 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^1 \partial_y^4 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_140(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_y * r_y * r_y * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 6.0 * (r_x * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x); + /* 42 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_z^3 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_203(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_z * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_x * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_z * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_z); + /* 44 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^1 \partial_z^2 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_212(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_y); + /* 48 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^2 \partial_z^1 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_221(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_y * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * (r_z); + /* 48 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^2 \partial_y^3 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_230(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_y * r_y * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_x * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_y * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_y); + /* 44 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_z^2 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_302(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_z * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_x) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_z * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x); + /* 44 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^1 \partial_z^1 + * }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_311(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_y * r_z); + /* 48 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^3 \partial_y^2 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_320(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_y * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * (r_x * r_x * r_x) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 3.0 * (r_x * r_y * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_x); + /* 44 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_z^1 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_401(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_z) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 6.0 * (r_x * r_x * r_z) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_z); + /* 42 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^4 \partial_y^1 }\phi(x, y, + * z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_410(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_y) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 6.0 * (r_x * r_x * r_y) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 3.0 * + (r_y); + /* 42 zero-valued terms not written out */ +} + +/** + * @brief Compute \f$ \frac{\partial^5}{ \partial_x^5 }\phi(x, y, z} \f$. + * + * Note that r_inv = 1./sqrt(r_x^2 + r_y^2 + r_z^2) + */ +INLINE static double D_500(double r_x, double r_y, double r_z, double r_inv) { + return -945. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * r_inv * r_inv * (r_x * r_x * r_x * r_x * r_x) + + 105. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * + r_inv * 10.0 * (r_x * r_x * r_x) - + 15. * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * r_inv * 15.0 * + (r_x); + /* 26 zero-valued terms not written out */ +} + +void test(double x, double y, double tol, double min, const char* name) { + + double diff = fabs(x - y); + double norm = 0.5 * fabs(x + y); + if (diff > norm * tol && norm > min) + error( + "Relative difference (%e) for '%s' (swift=%e) and (exact=%e) exceeds " + "tolerance (%e)", + diff / norm, name, x, y, tol); + /* else */ + /* message("'%s' (%e -- %e) OK!", name, x, y); */ +} + +int main() { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + /* Choke on FP-exceptions */ + feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); + + /* Relative tolerance */ + const double tol = 1e-4; + + /* Get some randomness going */ + const int seed = time(NULL); + message("Seed = %d", seed); + srand(seed); + + for (int i = 0; i < 100; ++i) { + + const double dx = 100. * ((double)rand() / (RAND_MAX)); + const double dy = 100. * ((double)rand() / (RAND_MAX)); + const double dz = 100. * ((double)rand() / (RAND_MAX)); + + message("Testing gravity for r=(%e %e %e)", dx, dy, dz); + + /* Compute distance */ + const double r2 = dx * dx + dy * dy + dz * dz; + const double r_inv = 1. / sqrt(r2); + + /* Compute all derivatives */ + struct potential_derivatives_M2L pot; + compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, 0., FLT_MAX, &pot); + + /* Minimal value we care about */ + const double min = 1e-9; + + /* Now check everything... */ + + /* 0th order terms */ + test(pot.D_000, D_000(dx, dy, dz, r_inv), tol, min, "D_000"); + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + /* 1st order terms */ + test(pot.D_100, D_100(dx, dy, dz, r_inv), tol, min, "D_100"); + test(pot.D_010, D_010(dx, dy, dz, r_inv), tol, min, "D_010"); + test(pot.D_001, D_001(dx, dy, dz, r_inv), tol, min, "D_001"); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + /* 2nd order terms */ + test(pot.D_200, D_200(dx, dy, dz, r_inv), tol, min, "D_200"); + test(pot.D_020, D_020(dx, dy, dz, r_inv), tol, min, "D_020"); + test(pot.D_002, D_002(dx, dy, dz, r_inv), tol, min, "D_002"); + test(pot.D_110, D_110(dx, dy, dz, r_inv), tol, min, "D_110"); + test(pot.D_101, D_101(dx, dy, dz, r_inv), tol, min, "D_101"); + test(pot.D_011, D_011(dx, dy, dz, r_inv), tol, min, "D_011"); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + + /* 3rd order terms */ + test(pot.D_300, D_300(dx, dy, dz, r_inv), tol, min, "D_300"); + test(pot.D_030, D_030(dx, dy, dz, r_inv), tol, min, "D_030"); + test(pot.D_003, D_003(dx, dy, dz, r_inv), tol, min, "D_003"); + test(pot.D_210, D_210(dx, dy, dz, r_inv), tol, min, "D_210"); + test(pot.D_201, D_201(dx, dy, dz, r_inv), tol, min, "D_201"); + test(pot.D_120, D_120(dx, dy, dz, r_inv), tol, min, "D_120"); + test(pot.D_021, D_021(dx, dy, dz, r_inv), tol, min, "D_021"); + test(pot.D_102, D_102(dx, dy, dz, r_inv), tol, min, "D_102"); + test(pot.D_012, D_012(dx, dy, dz, r_inv), tol, min, "D_012"); + test(pot.D_111, D_111(dx, dy, dz, r_inv), tol, min, "D_111"); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 3 + + /* 4th order terms */ + test(pot.D_400, D_400(dx, dy, dz, r_inv), tol, min, "D_400"); + test(pot.D_040, D_040(dx, dy, dz, r_inv), tol, min, "D_040"); + test(pot.D_004, D_004(dx, dy, dz, r_inv), tol, min, "D_004"); + test(pot.D_310, D_310(dx, dy, dz, r_inv), tol, min, "D_310"); + test(pot.D_301, D_301(dx, dy, dz, r_inv), tol, min, "D_301"); + test(pot.D_130, D_130(dx, dy, dz, r_inv), tol, min, "D_130"); + test(pot.D_031, D_031(dx, dy, dz, r_inv), tol, min, "D_031"); + test(pot.D_103, D_103(dx, dy, dz, r_inv), tol, min, "D_103"); + test(pot.D_013, D_013(dx, dy, dz, r_inv), tol, min, "D_013"); + test(pot.D_220, D_220(dx, dy, dz, r_inv), tol, min, "D_220"); + test(pot.D_202, D_202(dx, dy, dz, r_inv), tol, min, "D_202"); + test(pot.D_022, D_022(dx, dy, dz, r_inv), tol, min, "D_022"); + test(pot.D_211, D_211(dx, dy, dz, r_inv), tol, min, "D_211"); + test(pot.D_121, D_121(dx, dy, dz, r_inv), tol, min, "D_121"); + test(pot.D_112, D_112(dx, dy, dz, r_inv), tol, min, "D_112"); +#endif + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 4 + + /* 5th order terms */ + test(pot.D_500, D_500(dx, dy, dz, r_inv), tol, min, "D_500"); + test(pot.D_050, D_050(dx, dy, dz, r_inv), tol, min, "D_050"); + test(pot.D_005, D_005(dx, dy, dz, r_inv), tol, min, "D_005"); + test(pot.D_410, D_410(dx, dy, dz, r_inv), tol, min, "D_410"); + test(pot.D_401, D_401(dx, dy, dz, r_inv), tol, min, "D_401"); + test(pot.D_140, D_140(dx, dy, dz, r_inv), tol, min, "D_140"); + test(pot.D_041, D_041(dx, dy, dz, r_inv), tol, min, "D_041"); + test(pot.D_104, D_104(dx, dy, dz, r_inv), tol, min, "D_104"); + test(pot.D_014, D_014(dx, dy, dz, r_inv), tol, min, "D_014"); + test(pot.D_320, D_320(dx, dy, dz, r_inv), tol, min, "D_320"); + test(pot.D_302, D_302(dx, dy, dz, r_inv), tol, min, "D_302"); + test(pot.D_230, D_230(dx, dy, dz, r_inv), tol, min, "D_230"); + test(pot.D_032, D_032(dx, dy, dz, r_inv), tol, min, "D_032"); + test(pot.D_203, D_203(dx, dy, dz, r_inv), tol, min, "D_203"); + test(pot.D_023, D_023(dx, dy, dz, r_inv), tol, min, "D_023"); + test(pot.D_311, D_311(dx, dy, dz, r_inv), tol, min, "D_311"); + test(pot.D_131, D_131(dx, dy, dz, r_inv), tol, min, "D_131"); + test(pot.D_113, D_113(dx, dy, dz, r_inv), tol, min, "D_113"); + test(pot.D_122, D_122(dx, dy, dz, r_inv), tol, min, "D_122"); + test(pot.D_212, D_212(dx, dy, dz, r_inv), tol, min, "D_212"); + test(pot.D_221, D_221(dx, dy, dz, r_inv), tol, min, "D_221"); + +#endif + message("All good!"); + } + return 0; +} diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex index dc4266a23110873ff38ccbec4d71345e2780d6b2..d3030dc52c53eca421521023649d09522b39b7bf 100644 --- a/theory/Multipoles/fmm_standalone.tex +++ b/theory/Multipoles/fmm_standalone.tex @@ -2,6 +2,7 @@ \usepackage{graphicx} \usepackage{amsmath,paralist,xcolor,xspace,amssymb} \usepackage{times} +\usepackage{comment} \newcommand{\swift}{{\sc Swift}\xspace} \newcommand{\nbody}{$N$-body\xspace} diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex index 5c7b1e6566d7d51b5d27ea3c24d785571e1ad692..d1dba978663e966f2132a65133b3c2fec5e707b6 100644 --- a/theory/Multipoles/potential_derivatives.tex +++ b/theory/Multipoles/potential_derivatives.tex @@ -4,19 +4,139 @@ For completeness, we give here the full expression for the first few derivatives of the potential that are used in our FMM scheme. We use the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$ and -$u=r/H$. Starting from the potential (Eq. \ref{eq:fmm:potential}, -reproduced here for clarity), +$u=r/H$. We can construct the higher order derivatives by successively +applying the "chain rule". We show representative examples of the +first few relevant ones here split by order. We start by constructing +common quantities that appear in derivatives of multiple orders. + \begin{align} -\mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r},H) = -\left\lbrace\begin{array}{rcl} -\frac{1}{H} \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right) & \mbox{if} & u < 1,\\ -\frac{1}{r} & \mbox{if} & u \geq 1, -\end{array} -\right.\nonumber + \mathsf{\tilde{D}}_{1}(r, u, H) = + \left\lbrace\begin{array}{rcl} + \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right)\times H^{-1} & \mbox{if} & u < 1,\\ + r^{-1} & \mbox{if} & u \geq 1, + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{3}(r, u, H) = + \left\lbrace\begin{array}{rcl} + -\left(21u^5 - 90u^4 + 140u^3 -84u^2 +14\right)\times H^{-3}& \mbox{if} & u < 1,\\ + -1 \times r^{-3} & \mbox{if} & u \geq 1, + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{5}(r, u, H) = + \left\lbrace\begin{array}{rcl} + \left(-105u^3 + 360u^2 - 420u + 168\right)\times H^{-5}& \mbox{if} & u < 1,\\ + 3\times r^{-5} & \mbox{if} & u \geq 1, + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{7}(r, u, H) = + \left\lbrace\begin{array}{rcl} + -\left(315u - 720 + 420u^{-1}\right)\times H^{-7} & \mbox{if} & u < 1,\\ + -15\times r^{-7} & \mbox{if} & u \geq 1, + \end{array} + \right.\nonumber +\end{align} +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\begin{align} + \mathsf{\tilde{D}}_{9}(r, u, H) = + \left\lbrace\begin{array}{rcl} + \left(-315u^{-1} + 420u^{-3}\right)\times H^{-9}& \mbox{if} & u < 1,\\ + 105\times r^{-9} & \mbox{if} & u \geq 1. + \end{array} + \right.\nonumber +\end{align} +Starting from the potential (Eq. \ref{eq:fmm:potential}, +reproduced here for completeness), we can now build all the relevent derivatives +\begin{align} + \mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r},H) = + \mathsf{\tilde{D}}_{1}(r, u, H) \nonumber +\end{align} + +\noindent\rule{6cm}{0.4pt} +\begin{align} + \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = + r_x \mathsf{\tilde{D}}_{3}(r, u, H) \nonumber +\end{align} + +\noindent\rule{6cm}{0.4pt} +\begin{align} +\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r},H) = +r_x^2 \mathsf{\tilde{D}}_{5}(r, u, H) + +\mathsf{\tilde{D}}_{3}(r, u, H)\nonumber +\end{align} + +\begin{align} +\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r},H) = + r_x r_y \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber +\end{align} + +\noindent\rule{6cm}{0.4pt} +\begin{align} +\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = + r_x^3 \mathsf{\tilde{D}}_{7}(r, u, H) + + 3 r_x \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber +\end{align} + +\begin{align} +\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^2 r_y} \varphi (\mathbf{r},H) = +r_x^2 r_y \mathsf{\tilde{D}}_{7}(r, u, H) + +r_y \mathsf{\tilde{D}}_{5}(r, u, H) \nonumber +\end{align} + +\begin{align} +\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r},H) = + r_x r_y r_z \mathsf{\tilde{D}}_{7}(r, u, H) \nonumber +\end{align} + +\noindent\rule{6cm}{0.4pt} +\begin{align} + \mathsf{D}_{400}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^4} + \varphi (\mathbf{r},H) = + r_x^4 \mathsf{\tilde{D}}_{9}(r, u, H)+ + 6r_x^2 \mathsf{\tilde{D}}_{7}(r, u, H) + + 3 \mathsf{\tilde{D}}_{5}(r, u, H) + \nonumber \end{align} -we can construct the higher order terms by successively applying the -"chain rule". We show representative examples of the first few -relevant ones here split by order. + +\begin{align} + \mathsf{D}_{310}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^3 + \partial r_y} \varphi (\mathbf{r},H) = + r_x^3 r_y \mathsf{\tilde{D}}_{9}(r, u, H) + + 3 r_x r_y \mathsf{\tilde{D}}_{7}(r, u, H) + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{220}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2 + \partial r_y^2} \varphi (\mathbf{r},H) = + r_x^2 r_y^2 \mathsf{\tilde{D}}_{9}(r, u, H) + + r_x^2 \mathsf{\tilde{D}}_{7}(r, u, H) + + r_y^2 \mathsf{\tilde{D}}_{7}(r, u, H) + + \mathsf{\tilde{D}}_{5}(r, u, H) + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{211}(\mathbf{r}) &= \frac{\partial^4}{\partial r_x^2 + \partial r_y \partial r_z} \varphi (\mathbf{r},H) = + r_x^2 r_y r_z \mathsf{\tilde{D}}_{9}(r, u, H) + + r_y r_z \mathsf{\tilde{D}}_{7}(r, u, H) + \nonumber +\end{align} + + + +\begin{comment} + +\noindent\rule{6cm}{0.4pt} \begin{align} \mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = @@ -101,3 +221,5 @@ relevant ones here split by order. \mathsf{D}_{211}(\mathbf{r}) &= \nonumber \end{align} + +\end{comment}