/******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) * 2016 Matthieu Schaller (schaller@strw.leidenuniv.nl) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . * ******************************************************************************/ #include /* This object's header. */ #include "runner_doiact_grav.h" /* Local includes. */ #include "active.h" #include "cell.h" #include "gravity.h" #include "gravity_cache.h" #include "gravity_iact.h" #include "inline.h" #include "part.h" #include "space_getsid.h" #include "timers.h" /** * @brief Clear the unskip flags of this cell. * * For inactive or foreign cells, we additionally need to recurse. * * @brief c The #cell of interest. * @brief e The #engine (to check whether active or not). */ static INLINE void runner_clear_grav_flags(struct cell *c, const struct engine *e) { if ((!cell_is_active_gravity(c, e) || c->nodeID != e->nodeID) && c->split) { for (int k = 0; k < 8; ++k) if (c->progeny[k] != NULL) runner_clear_grav_flags(c->progeny[k], e); } /* Remove the unskip flags. */ cell_clear_flag(c, cell_flag_unskip_self_grav_processed | cell_flag_unskip_pair_grav_processed); } /** * @brief Recursively propagate the multipoles down the tree by applying the * L2L and L2P kernels. * * @param r The #runner. * @param c The #cell we are working on. * @param timer Are we timing this ? */ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { /* Some constants */ const struct engine *e = r->e; TIMER_TIC; #ifdef SWIFT_DEBUG_CHECKS if (c->grav.ti_old_multipole != e->ti_current) error("c->multipole not drifted."); if (c->grav.multipole->pot.ti_init != e->ti_current) error("c->field tensor not initialised"); #endif if (c->split) { /* Node case */ /* Add the field-tensor to all the 8 progenitors */ for (int k = 0; k < 8; ++k) { struct cell *cp = c->progeny[k]; /* Do we have a progenitor with any active g-particles ? */ if (cp != NULL && cell_is_active_gravity(cp, e)) { #ifdef SWIFT_DEBUG_CHECKS if (cp->grav.ti_old_multipole != e->ti_current) error("cp->multipole not drifted."); if (cp->grav.multipole->pot.ti_init != e->ti_current) error("cp->field tensor not initialised"); #endif /* If the tensor received any contribution, push it down */ if (c->grav.multipole->pot.interacted) { struct grav_tensor shifted_tensor; /* Shift the field tensor */ gravity_L2L(&shifted_tensor, &c->grav.multipole->pot, cp->grav.multipole->CoM, c->grav.multipole->CoM); /* Add it to this level's tensor */ gravity_field_tensors_add(&cp->grav.multipole->pot, &shifted_tensor); } /* Recurse */ runner_do_grav_down(r, cp, 0); } } } else { /* Leaf case */ /* We can abort early if no interactions via multipole happened */ if (!c->grav.multipole->pot.interacted) return; if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the cell for the particle updates */ lock_lock(&c->grav.plock); #endif /* Cell properties */ struct gpart *gparts = c->grav.parts; const int gcount = c->grav.count; const struct grav_tensor *pot = &c->grav.multipole->pot; const double CoM[3] = {c->grav.multipole->CoM[0], c->grav.multipole->CoM[1], c->grav.multipole->CoM[2]}; /* Apply accelerations to the particles */ for (int i = 0; i < gcount; ++i) { /* Get a handle on the gpart */ struct gpart *gp = &gparts[i]; /* Update if active */ if (gpart_is_active(gp, e)) { #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ if (gp->ti_drift != e->ti_current) error("gpart not drifted to current time"); if (c->grav.multipole->pot.ti_init != e->ti_current) error("c->field tensor not initialised"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(gp, e)) error("Updating an inhibited particle!"); /* Check that the particle was initialised */ if (gp->initialised == 0) error("Adding forces to an un-initialised gpart."); #endif /* Apply the kernel */ gravity_L2P(pot, CoM, gp); } } #ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* All done -> unlock the cell */ if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell"); #endif } if (timer) TIMER_TOC(timer_dograv_down); } /** * @brief Compute the fully Newtonian gravitational forces from particles * one array onto the particles in another array * * This function *must* be called at the leaf level for particles i. * * @param gparts_i The particles receiving forces (at leaf level). * @param gcount_i The number of particles receiving forces. * @param gparts_j The particles giving forces (at any level). * @param gcount_j The number of particles giving forces. * @param e The #engine structure. * @param grav_props The properties of the gravity scheme. * @param cache_i The gravity cache to use to store the results in i. * @param ci The (leaf-)cell containing the particles i. * @param multi_j The multipole in cell j. */ static INLINE void runner_dopair_grav_pp_full_no_cache( struct gpart *restrict gparts_i, const int gcount_i, const struct gpart *restrict gparts_j, const int gcount_j, const struct engine *e, const struct gravity_props *grav_props, struct gravity_cache *cache_i, struct cell *ci, const struct gravity_tensors *multi_j) { /* Prepare the i cache */ const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; if (cache_i->count < gcount_padded_i) gravity_cache_init(cache_i, gcount_padded_i); gravity_cache_zero_output(cache_i, gcount_padded_i); #ifdef SWIFT_DEBUG_CHECKS if (ci->split) error("Using function above leaf level!"); #endif /* Loop over sink particles */ for (int i = 0; i < gcount_i; ++i) { struct gpart *gpi = &gparts_i[i]; /* Ignore inactive particles */ if (!gpart_is_active(gpi, e)) continue; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ if (gpi->ti_drift != e->ti_current) error("gpi not drifted to current time"); /* Check that the particle was initialised */ if (gpi->initialised == 0) error("Adding forces to an un-initialised gpart."); #endif const float x_i = gpi->x[0]; const float y_i = gpi->x[1]; const float z_i = gpi->x[2]; const float h_i = gravity_get_softening(gpi, grav_props); /* Local accumulators for the acceleration and potential */ float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Now, we can start the interactions for that particle */ /* Distance to the Multipole */ const float CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], multi_j->CoM[2]}; const float dx_multi = CoM_j[0] - x_i; const float dy_multi = CoM_j[1] - y_i; const float dz_multi = CoM_j[2] - z_i; const float r2_multi = dx_multi * dx_multi + dy_multi * dy_multi + dz_multi * dz_multi; /* Can we use the Mulipole here? */ if (gcount_j > 1 && gravity_M2P_accept(grav_props, gpi, multi_j, r2_multi, /*periodic=*/1)) { const float h_inv_i = 1.f / h_i; /* Interact! */ float f_x, f_y, f_z, pot_ij; runner_iact_grav_pm_full(dx_multi, dy_multi, dz_multi, r2_multi, h_i, h_inv_i, &multi_j->m_pole, &f_x, &f_y, &f_z, &pot_ij); /* Store it back */ a_x += f_x; a_y += f_y; a_z += f_z; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ accumulate_add_ll(&gparts_i[i].num_interacted, multi_j->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ accumulate_add_ll(&gparts_i[i].num_interacted_m2p, multi_j->m_pole.num_gpart); accumulate_add_f(&gparts_i[i].a_grav_m2p[0], f_x); accumulate_add_f(&gparts_i[i].a_grav_m2p[1], f_y); accumulate_add_f(&gparts_i[i].a_grav_m2p[2], f_z); #endif } else { /* Loop over source particles */ for (int j = 0; j < gcount_j; ++j) { const struct gpart *gpj = &gparts_j[j]; /* Ignore inhibited particles */ if (gpart_is_inhibited(gpj, e)) continue; /* Get info about j */ const float x_j = gpj->x[0]; const float y_j = gpj->x[1]; const float z_j = gpj->x[2]; const float mass_j = gpj->mass; const float h_j = gravity_get_softening(gpj, grav_props); /* Compute the pairwise distance. Note: no need for box wrap here! This is non-periodic */ const float dx = x_j - x_i; const float dy = y_j - y_i; const float dz = z_j - z_i; const float r2 = dx * dx + dy * dy + dz * dz; /* Pick the maximal softening length of i and j */ const float h = max(h_i, h_j); const float h2 = h * h; const float h_inv = 1.f / h; const float h_inv_3 = h_inv * h_inv * h_inv; #ifdef SWIFT_DEBUG_CHECKS if (gpj->time_bin == time_bin_not_created) { error("Found an extra gpart in the gravity interaction"); } if (r2 == 0.f && h2 == 0.) error("Interacting particles with 0 distance and 0 softening."); /* Check that particles have been drifted to the current time */ if (gpj->ti_drift != e->ti_current) error("gpj not drifted to current time"); #endif /* Interact! */ float f_ij, pot_ij; runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); /* Store it back */ a_x += f_ij * dx; a_y += f_ij * dy; a_z += f_ij * dz; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ accumulate_inc_ll(&gparts_i[i].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter */ accumulate_inc_ll(&gparts_i[i].num_interacted_p2p); accumulate_add_f(&gparts_i[i].a_grav_p2p[0], a_x); accumulate_add_f(&gparts_i[i].a_grav_p2p[1], a_y); accumulate_add_f(&gparts_i[i].a_grav_p2p[2], a_z); #endif } } /* Store everything back in cache */ cache_i->a_x[i] += a_x; cache_i->a_y[i] += a_y; cache_i->a_z[i] += a_z; cache_i->pot[i] += pot; cache_i->active[i] = 1; } /* Write back to the particle data */ #ifndef SWIFT_TASKS_WITHOUT_ATOMICS lock_lock(&ci->grav.plock); #endif gravity_cache_write_back(cache_i, ci->grav.parts, gcount_i); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); #endif } /** * @brief Compute the long-range truncated gravitational forces from particles * one array onto the particles in another array * * This function *must* be called at the leaf level for particles i. * * @param gparts_i The particles receiving forces (at leaf level). * @param gcount_i The number of particles receiving forces. * @param gparts_j The particles giving forces (at any level). * @param gcount_j The number of particles giving forces. * @param dim The size of the computational domain. * @param e The #engine structure. * @param grav_props The properties of the gravity scheme. * @param cache_i The gravity cache to use to store the results in i. * @param ci The (leaf-)cell containing the particles i. * @param multi_j The multipole in cell j. */ static INLINE void runner_dopair_grav_pp_truncated_no_cache( struct gpart *restrict gparts_i, const int gcount_i, const struct gpart *restrict gparts_j, const int gcount_j, const float dim[3], const struct engine *e, const struct gravity_props *grav_props, struct gravity_cache *cache_i, struct cell *ci, const struct gravity_tensors *multi_j) { #ifdef SWIFT_DEBUG_CHECKS if (!e->s->periodic) error("Calling truncated PP function in non-periodic setup."); if (ci->split) error("Using function above leaf level!"); #endif const float r_s_inv = grav_props->r_s_inv; /* Prepare the i cache */ const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; if (cache_i->count < gcount_padded_i) gravity_cache_init(cache_i, gcount_padded_i); gravity_cache_zero_output(cache_i, gcount_padded_i); /* Loop over sink particles */ for (int i = 0; i < gcount_i; ++i) { struct gpart *gpi = &gparts_i[i]; /* Ignore inactive particles */ if (!gpart_is_active(gpi, e)) continue; #ifdef SWIFT_DEBUG_CHECKS /* Check that particles have been drifted to the current time */ if (gpi->ti_drift != e->ti_current) error("gpi not drifted to current time"); /* Check that the particle was initialised */ if (gpi->initialised == 0) error("Adding forces to an un-initialised gpart."); #endif const float x_i = gpi->x[0]; const float y_i = gpi->x[1]; const float z_i = gpi->x[2]; const float h_i = gravity_get_softening(gpi, grav_props); /* Local accumulators for the acceleration and potential */ float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Now, we can start the interactions for that particle */ /* Distance to the Multipole */ const float CoM_j[3] = {multi_j->CoM[0], multi_j->CoM[1], multi_j->CoM[2]}; float dx_multi = CoM_j[0] - x_i; float dy_multi = CoM_j[1] - y_i; float dz_multi = CoM_j[2] - z_i; /* Apply periodic BCs */ dx_multi = nearestf(dx_multi, dim[0]); dy_multi = nearestf(dy_multi, dim[1]); dz_multi = nearestf(dz_multi, dim[2]); const float r2_multi = dx_multi * dx_multi + dy_multi * dy_multi + dz_multi * dz_multi; /* Can we use the Mulipole here? */ if (gcount_j > 1 && gravity_M2P_accept(grav_props, gpi, multi_j, r2_multi, /*periodic=*/1)) { const float h_inv_i = 1.f / h_i; /* Interact! */ float f_x, f_y, f_z, pot_ij; runner_iact_grav_pm_truncated(dx_multi, dy_multi, dz_multi, r2_multi, h_i, h_inv_i, r_s_inv, &multi_j->m_pole, &f_x, &f_y, &f_z, &pot_ij); /* Store it back */ a_x += f_x; a_y += f_y; a_z += f_z; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ accumulate_add_ll(&gparts_i[i].num_interacted, multi_j->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ accumulate_add_ll(&gparts_i[i].num_interacted_m2p, multi_j->m_pole.num_gpart); accumulate_add_f(&gparts_i[i].a_grav_m2p[0], f_x); accumulate_add_f(&gparts_i[i].a_grav_m2p[1], f_y); accumulate_add_f(&gparts_i[i].a_grav_m2p[2], f_z); #endif } else { /* Loop over source particles */ for (int j = 0; j < gcount_j; ++j) { const struct gpart *gpj = &gparts_j[j]; /* Ignore inhibited particles */ if (gpart_is_inhibited(gpj, e)) continue; /* Get info about j */ const float x_j = gpj->x[0]; const float y_j = gpj->x[1]; const float z_j = gpj->x[2]; const float mass_j = gpj->mass; const float h_j = gravity_get_softening(gpj, grav_props); /* Compute the pairwise distance. Note: no need for box wrap here! This is non-periodic */ float dx = x_j - x_i; float dy = y_j - y_i; float dz = z_j - z_i; /* Correct for periodic BCs */ dx = nearestf(dx, dim[0]); dy = nearestf(dy, dim[1]); dz = nearestf(dz, dim[2]); const float r2 = dx * dx + dy * dy + dz * dz; /* Pick the maximal softening length of i and j */ const float h = max(h_i, h_j); const float h2 = h * h; const float h_inv = 1.f / h; const float h_inv_3 = h_inv * h_inv * h_inv; #ifdef SWIFT_DEBUG_CHECKS if (gpj->time_bin == time_bin_not_created) { error("Found an extra gpart in the gravity interaction"); } if (r2 == 0.f && h2 == 0.) error("Interacting particles with 0 distance and 0 softening."); /* Check that particles have been drifted to the current time */ if (gpj->ti_drift != e->ti_current) error("gpj not drifted to current time"); #endif /* Interact! */ float f_ij, pot_ij; runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, &f_ij, &pot_ij); /* Store it back */ a_x += f_ij * dx; a_y += f_ij * dy; a_z += f_ij * dz; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ accumulate_inc_ll(&gparts_i[i].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter */ accumulate_inc_ll(&gparts_i[i].num_interacted_p2p); accumulate_add_f(&gparts_i[i].a_grav_p2p[0], a_x); accumulate_add_f(&gparts_i[i].a_grav_p2p[1], a_y); accumulate_add_f(&gparts_i[i].a_grav_p2p[2], a_z); #endif } } /* Store everything back in cache */ cache_i->a_x[i] += a_x; cache_i->a_y[i] += a_y; cache_i->a_z[i] += a_z; cache_i->pot[i] += pot; cache_i->active[i] = 1; } /* Write back to the particle data */ #ifndef SWIFT_TASKS_WITHOUT_ATOMICS lock_lock(&ci->grav.plock); #endif gravity_cache_write_back(cache_i, ci->grav.parts, gcount_i); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); #endif } /** * @brief Compute the non-truncated gravity interactions between all particles * of a cell and the particles of the other cell. * * The calculation is performed non-symmetrically using the pre-filled * #gravity_cache structures. The loop over the j cache should auto-vectorize. * * @param ci_cache #gravity_cache contaning the particles to be updated. * @param cj_cache #gravity_cache contaning the source particles. * @param gcount_i The number of particles in the cell i. * @param gcount_padded_j The number of particles in the cell j padded to the * vector length. * @param periodic Is the calculation using periodic BCs ? * @param dim The size of the simulation volume. * * @param e The #engine (for debugging checks only). * @param gparts_i The #gpart in cell i (for debugging checks only). * @param gparts_j The #gpart in cell j (for debugging checks only). * @param gcount_j The number of particles in the cell j (for debugging checks * only). */ static INLINE void runner_dopair_grav_pp_full( struct gravity_cache *restrict ci_cache, struct gravity_cache *restrict cj_cache, const int gcount_i, const int gcount_j, const int gcount_padded_j, const int periodic, const float dim[3], const struct engine *restrict e, struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount_i; pid++) { /* Skip inactive particles */ if (!ci_cache->active[pid]) continue; /* Skip particle that can use the multipole */ if (ci_cache->use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS if (!gpart_is_active(&gparts_i[pid], e)) error("Inactive particle went through the cache"); #endif const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; const float z_i = ci_cache->z[pid]; const float h_i = ci_cache->epsilon[pid]; /* Local accumulators for the acceleration and potential */ float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded_j, VEC_SIZE); /* Loop over every particle in the other cell. */ for (int pjd = 0; pjd < gcount_padded_j; pjd++) { /* Get info about j */ const float x_j = cj_cache->x[pjd]; const float y_j = cj_cache->y[pjd]; const float z_j = cj_cache->z[pjd]; const float mass_j = cj_cache->m[pjd]; const float h_j = cj_cache->epsilon[pjd]; /* Compute the pairwise distance. */ float dx = x_j - x_i; float dy = y_j - y_i; float dz = z_j - z_i; /* Correct for periodic BCs */ if (periodic) { dx = nearestf(dx, dim[0]); dy = nearestf(dy, dim[1]); dz = nearestf(dz, dim[2]); } const float r2 = dx * dx + dy * dy + dz * dz; /* Pick the maximal softening length of i and j */ const float h = max(h_i, h_j); const float h2 = h * h; const float h_inv = 1.f / h; const float h_inv_3 = h_inv * h_inv * h_inv; #ifdef SWIFT_DEBUG_CHECKS /* The gravity_cache are sometimes allocated with more place than required => flag with mass=0 */ if (mass_j != 0.f && gparts_j[pjd].time_bin == time_bin_not_created) { error("Found an extra gpart in the gravity interaction"); } if (gparts_i[pid].time_bin == time_bin_not_created && ci_cache->m[pid] != 0.f) { error("Found an extra gpart in the gravity interaction"); } if (r2 == 0.f && h2 == 0.) error("Interacting particles with 0 distance and 0 softening."); /* Check that particles have been drifted to the current time */ if (gparts_i[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && !gpart_is_inhibited(&gparts_j[pjd], e)) error("gpj not drifted to current time"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(&gparts_i[pid], e)) error("Updating an inhibited particle!"); /* Check that the particle we interact with was not inhibited */ if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && mass_j != 0.f) error("Inhibited particle used as gravity source."); /* Check that the particle was initialised */ if (gparts_i[pid].initialised == 0) error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ float f_ij, pot_ij; runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); /* Store it back */ a_x += f_ij * dx; a_y += f_ij * dy; a_z += f_ij * dz; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) accumulate_inc_ll(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p); #endif } /* Store everything back in cache */ ci_cache->a_x[pid] += a_x; ci_cache->a_y[pid] += a_y; ci_cache->a_z[pid] += a_z; ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } /** * @brief Compute the truncated gravity interactions between all particles * of a cell and the particles of the other cell. * * The calculation is performed non-symmetrically using the pre-filled * #gravity_cache structures. The loop over the j cache should auto-vectorize. * * This function only makes sense in periodic BCs. * * @param ci_cache #gravity_cache contaning the particles to be updated. * @param cj_cache #gravity_cache contaning the source particles. * @param gcount_i The number of particles in the cell i. * @param gcount_padded_j The number of particles in the cell j padded to the * vector length. * @param dim The size of the simulation volume. * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. * * @param e The #engine (for debugging checks only). * @param gparts_i The #gpart in cell i (for debugging checks only). * @param gparts_j The #gpart in cell j (for debugging checks only). * @param gcount_j The number of particles in the cell j (for debugging checks * only). */ static INLINE void runner_dopair_grav_pp_truncated( struct gravity_cache *restrict ci_cache, struct gravity_cache *restrict cj_cache, const int gcount_i, const int gcount_j, const int gcount_padded_j, const float dim[3], const float r_s_inv, const struct engine *restrict e, struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { #ifdef SWIFT_DEBUG_CHECKS if (!e->s->periodic) error("Calling truncated PP function in non-periodic setup."); #endif /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount_i; pid++) { /* Skip inactive particles */ if (!ci_cache->active[pid]) continue; /* Skip particle that can use the multipole */ if (ci_cache->use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS if (!gpart_is_active(&gparts_i[pid], e)) error("Inactive particle went through the cache"); #endif const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; const float z_i = ci_cache->z[pid]; const float h_i = ci_cache->epsilon[pid]; /* Local accumulators for the acceleration and potential */ float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, cj_cache->x, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->y, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->z, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->m, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, cj_cache->epsilon, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded_j, VEC_SIZE); /* Loop over every particle in the other cell. */ for (int pjd = 0; pjd < gcount_padded_j; pjd++) { /* Get info about j */ const float x_j = cj_cache->x[pjd]; const float y_j = cj_cache->y[pjd]; const float z_j = cj_cache->z[pjd]; const float mass_j = cj_cache->m[pjd]; const float h_j = cj_cache->epsilon[pjd]; /* Compute the pairwise distance. */ float dx = x_j - x_i; float dy = y_j - y_i; float dz = z_j - z_i; /* Correct for periodic BCs */ dx = nearestf(dx, dim[0]); dy = nearestf(dy, dim[1]); dz = nearestf(dz, dim[2]); const float r2 = dx * dx + dy * dy + dz * dz; /* Pick the maximal softening length of i and j */ const float h = max(h_i, h_j); const float h2 = h * h; const float h_inv = 1.f / h; const float h_inv_3 = h_inv * h_inv * h_inv; #ifdef SWIFT_DEBUG_CHECKS /* The gravity_cache are sometimes allocated with more place than required => flag with mass=0 */ if (gparts_i[pid].time_bin == time_bin_not_created && ci_cache->m[pid] != 0.) { error("Found an extra gpart in the gravity interaction"); } if (pjd < gcount_j && gparts_j[pjd].time_bin == time_bin_not_created && mass_j != 0.) { error("Found an extra gpart in the gravity interaction"); } if (r2 == 0.f && h2 == 0.) error("Interacting particles with 0 distance and 0 softening."); /* Check that particles have been drifted to the current time */ if (gparts_i[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); if (pjd < gcount_j && gparts_j[pjd].ti_drift != e->ti_current && !gpart_is_inhibited(&gparts_j[pjd], e)) error("gpj not drifted to current time"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(&gparts_i[pid], e)) error("Updating an inhibited particle!"); /* Check that the particle we interact with was not inhibited */ if (pjd < gcount_j && gpart_is_inhibited(&gparts_j[pjd], e) && mass_j != 0.f) error("Inhibited particle used as gravity source."); /* Check that the particle was initialised */ if (gparts_i[pid].initialised == 0) error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ float f_ij, pot_ij; runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, &f_ij, &pot_ij); /* Store it back */ a_x += f_ij * dx; a_y += f_ij * dy; a_z += f_ij * dz; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) accumulate_inc_ll(&gparts_i[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the p2p interaction counter if it's not a padded gpart */ if (pjd < gcount_j && !gpart_is_inhibited(&gparts_j[pjd], e)) accumulate_inc_ll(&gparts_i[pid].num_interacted_p2p); #endif } /* Store everything back in cache */ ci_cache->a_x[pid] += a_x; ci_cache->a_y[pid] += a_y; ci_cache->a_z[pid] += a_z; ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS accumulate_add_f(&gparts_i[pid].a_grav_p2p[0], a_x); accumulate_add_f(&gparts_i[pid].a_grav_p2p[1], a_y); accumulate_add_f(&gparts_i[pid].a_grav_p2p[2], a_z); #endif } } /** * @brief Compute the gravity interactions between all particles * of a cell and the multipole of the other cell. * * The calculation is performedusing the pre-filled * #gravity_cache structure. The loop over the i cache should auto-vectorize. * * @param ci_cache #gravity_cache contaning the particles to be updated. * @param gcount_padded_i The number of particles in the cell i padded to the * vector length. * @param CoM_j Position of the #multipole in #cell j. * @param multi_j The #multipole in #cell j. * @param periodic Is the calculation using periodic BCs ? * @param dim The size of the simulation volume. * * @param e The #engine (for debugging checks only). * @param gparts_i The #gpart in cell i (for debugging checks only). * @param gcount_i The number of particles in the cell i (for debugging checks * only). * @param cj The #cell j (for debugging checks only). */ static INLINE void runner_dopair_grav_pm_full( struct gravity_cache *ci_cache, const int gcount_padded_i, const float CoM_j[3], const struct multipole *restrict multi_j, const int periodic, const float dim[3], const struct engine *restrict e, struct gpart *restrict gparts_i, const int gcount_i, const struct cell *restrict cj) { /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(int, active, ci_cache->active, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded_i, VEC_SIZE); const float multi_epsilon = multi_j->max_softening; /* Loop over all particles in ci... */ #if !defined(SWIFT_DEBUG_CHECKS) && !defined(SWIFT_GRAVITY_FORCE_CHECKS) && \ _OPENMP >= 201307 #pragma omp simd #endif for (int pid = 0; pid < gcount_padded_i; pid++) { /* Skip inactive particles */ if (!active[pid]) continue; /* Skip particle that cannot use the multipole */ if (!use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS /* The gravity_cache are sometimes allocated with more place than required => flag with mass=0 */ if (gparts_i[pid].time_bin == time_bin_not_created && ci_cache->m[pid] != 0.) { error("Found an extra gpart in the gravity interaction"); } if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) error("Active particle went through the cache"); /* Check that particles have been drifted to the current time */ if (gparts_i[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(&gparts_i[pid], e)) error("Updating an inhibited particle!"); /* Check that the particle was initialised */ if (gparts_i[pid].initialised == 0) error("Adding forces to an un-initialised gpart."); if (pid >= gcount_i) error("Adding forces to padded particle"); #endif const float x_i = x[pid]; const float y_i = y[pid]; const float z_i = z[pid]; /* Some powers of the softening length */ const float h_i = max(epsilon[pid], multi_epsilon); const float h_inv_i = 1.f / h_i; /* Distance to the Multipole */ float dx = CoM_j[0] - x_i; float dy = CoM_j[1] - y_i; float dz = CoM_j[2] - z_i; /* Apply periodic BCs? */ if (periodic) { dx = nearestf(dx, dim[0]); dy = nearestf(dy, dim[1]); dz = nearestf(dz, dim[2]); } const float r2 = dx * dx + dy * dy + dz * dz; #ifdef SWIFT_DEBUG_CHECKS if (!gravity_M2P_accept(e->gravity_properties, &gparts_i[pid], cj->grav.multipole, r2 * 1.01, periodic)) error("use_mpole[i] set when M2P accept fails"); #endif /* Interact! */ float f_x, f_y, f_z, pot_ij; runner_iact_grav_pm_full(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, &f_z, &pot_ij); /* Store it back */ a_x[pid] += f_x; a_y[pid] += f_y; a_z[pid] += f_z; pot[pid] += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) accumulate_add_ll(&gparts_i[pid].num_interacted, cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, cj->grav.multipole->m_pole.num_gpart); accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } } /** * @brief Compute the gravity interactions between all particles * of a cell and the multipole of the other cell. * * The calculation is performedusing the pre-filled * #gravity_cache structure. The loop over the i cache should auto-vectorize. * * This function only makes sense in periodic BCs. * * @param ci_cache #gravity_cache contaning the particles to be updated. * @param gcount_padded_i The number of particles in the cell i padded to the * vector length. * @param CoM_j Position of the #multipole in #cell j. * @param multi_j The #multipole in #cell j. * @param dim The size of the simulation volume. * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. * * @param e The #engine (for debugging checks only). * @param gparts_i The #gpart in cell i (for debugging checks only). * @param gcount_i The number of particles in the cell i (for debugging checks * only). * @param cj The #cell j (for debugging checks only). */ static INLINE void runner_dopair_grav_pm_truncated( struct gravity_cache *ci_cache, const int gcount_padded_i, const float CoM_j[3], const struct multipole *restrict multi_j, const float dim[3], const float r_s_inv, const struct engine *restrict e, struct gpart *restrict gparts_i, const int gcount_i, const struct cell *restrict cj) { #ifdef SWIFT_DEBUG_CHECKS if (!e->s->periodic) error("Calling truncated PP function in non-periodic setup."); #endif /* Make the compiler understand we are in happy vectorization land */ swift_declare_aligned_ptr(float, x, ci_cache->x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, y, ci_cache->y, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, z, ci_cache->z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, epsilon, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, a_x, ci_cache->a_x, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, a_y, ci_cache->a_y, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, a_z, ci_cache->a_z, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(float, pot, ci_cache->pot, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(int, active, ci_cache->active, SWIFT_CACHE_ALIGNMENT); swift_declare_aligned_ptr(int, use_mpole, ci_cache->use_mpole, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded_i, VEC_SIZE); const float multi_epsilon = multi_j->max_softening; /* Loop over all particles in ci... */ #if !defined(SWIFT_DEBUG_CHECKS) && !defined(SWIFT_GRAVITY_FORCE_CHECKS) && \ _OPENMP >= 201307 #pragma omp simd #endif for (int pid = 0; pid < gcount_padded_i; pid++) { /* Skip inactive particles */ if (!active[pid]) continue; /* Skip particle that cannot use the multipole */ if (!use_mpole[pid]) continue; #ifdef SWIFT_DEBUG_CHECKS /* The gravity_cache are sometimes allocated with more place than required => flag with mass=0 */ if (gparts_i[pid].time_bin == time_bin_not_created && ci_cache->m[pid] != 0.) { error("Found an extra gpart in the gravity interaction"); } if (pid < gcount_i && !gpart_is_active(&gparts_i[pid], e)) error("Active particle went through the cache"); /* Check that particles have been drifted to the current time */ if (gparts_i[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(&gparts_i[pid], e)) error("Updating an inhibited particle!"); /* Check that the particle was initialised */ if (gparts_i[pid].initialised == 0) error("Adding forces to an un-initialised gpart."); if (pid >= gcount_i) error("Adding forces to padded particle"); #endif const float x_i = x[pid]; const float y_i = y[pid]; const float z_i = z[pid]; /* Some powers of the softening length */ const float h_i = max(epsilon[pid], multi_epsilon); const float h_inv_i = 1.f / h_i; /* Distance to the Multipole */ float dx = CoM_j[0] - x_i; float dy = CoM_j[1] - y_i; float dz = CoM_j[2] - z_i; /* Apply periodic BCs */ dx = nearestf(dx, dim[0]); dy = nearestf(dy, dim[1]); dz = nearestf(dz, dim[2]); const float r2 = dx * dx + dy * dy + dz * dz; #ifdef SWIFT_DEBUG_CHECKS if (!gravity_M2P_accept(e->gravity_properties, &gparts_i[pid], cj->grav.multipole, r2 * 1.01, /*periodic=*/1)) error("use_mpole[i] set when M2P accept fails"); #endif /* Interact! */ float f_x, f_y, f_z, pot_ij; runner_iact_grav_pm_truncated(dx, dy, dz, r2, h_i, h_inv_i, r_s_inv, multi_j, &f_x, &f_y, &f_z, &pot_ij); /* Store it back */ a_x[pid] += f_x; a_y[pid] += f_y; a_z[pid] += f_z; pot[pid] += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ if (pid < gcount_i) accumulate_add_ll(&gparts_i[pid].num_interacted, cj->grav.multipole->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the M2P interaction counter and forces. */ if (pid < gcount_i) { accumulate_add_ll(&gparts_i[pid].num_interacted_m2p, cj->grav.multipole->m_pole.num_gpart); accumulate_add_f(&gparts_i[pid].a_grav_m2p[0], f_x); accumulate_add_f(&gparts_i[pid].a_grav_m2p[1], f_y); accumulate_add_f(&gparts_i[pid].a_grav_m2p[2], f_z); } #endif } } /** * @brief Computes the interaction of all the particles in a cell with all the * particles of another cell. * * This function switches between the full potential and the truncated one * depending on needs. It will also use the M2P (multipole) interaction * for the subset of particles in either cell for which the distance criterion * is valid. * * This function starts by constructing the require #gravity_cache for both * cells and then call the specialised functions doing the actual work on * the caches. It then write the data back to the particles. * * @param r The #runner. * @param ci The first #cell. * @param cj The other #cell. * @param symmetric Are we updating both cells (1) or just ci (0) ? * @param allow_mpole Are we allowing the use of M2P interactions ? */ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj, const int symmetric, const int allow_mpole) { /* Recover some useful constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], (float)e->mesh->dim[2]}; const float r_s_inv = e->mesh->r_s_inv; const double min_trunc = e->mesh->r_cut_min; TIMER_TIC; /* Record activity status */ const int ci_active = cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); const int cj_active = cell_is_active_gravity(cj, e) && (cj->nodeID == e->nodeID); /* Anything to do here? */ if (!ci_active && !cj_active) return; if (!ci_active && !symmetric) return; #ifdef SWIFT_DEBUG_CHECKS /* Check that we are not doing something stupid */ if (ci->split || cj->split) error("Running P-P on splitable cells"); /* Let's start by checking things are drifted */ if (!cell_are_gpart_drifted(ci, e)) error("Un-drifted gparts"); if (!cell_are_gpart_drifted(cj, e)) error("Un-drifted gparts"); if (cj_active && ci->grav.ti_old_multipole != e->ti_current) error("Un-drifted multipole"); if (ci_active && cj->grav.ti_old_multipole != e->ti_current) error("Un-drifted multipole"); #endif /* Caches to play with */ struct gravity_cache *const ci_cache = &r->ci_gravity_cache; struct gravity_cache *const cj_cache = &r->cj_gravity_cache; /* Shift to apply to the particles in each cell */ const double shift_i[3] = {0., 0., 0.}; const double shift_j[3] = {0., 0., 0.}; /* Recover the multipole info and shift the CoM locations */ const float rmax_i = ci->grav.multipole->r_max; const float rmax_j = cj->grav.multipole->r_max; const struct multipole *multi_i = &ci->grav.multipole->m_pole; const struct multipole *multi_j = &cj->grav.multipole->m_pole; const float CoM_i[3] = {(float)(ci->grav.multipole->CoM[0] - shift_i[0]), (float)(ci->grav.multipole->CoM[1] - shift_i[1]), (float)(ci->grav.multipole->CoM[2] - shift_i[2])}; const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0] - shift_j[0]), (float)(cj->grav.multipole->CoM[1] - shift_j[1]), (float)(cj->grav.multipole->CoM[2] - shift_j[2])}; /* Start by constructing particle caches */ /* Computed the padded counts */ const int gcount_i = ci->grav.count; const int gcount_j = cj->grav.count; const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; const int gcount_padded_j = gcount_j - (gcount_j % VEC_SIZE) + VEC_SIZE; const int allow_multipole_i = allow_mpole && ci->grav.count > 1; const int allow_multipole_j = allow_mpole && cj->grav.count > 1; /* Fill the caches */ gravity_cache_populate(e->max_active_bin, allow_multipole_j, periodic, dim, ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, shift_i, CoM_j, cj->grav.multipole, ci, e->gravity_properties); gravity_cache_populate(e->max_active_bin, allow_multipole_i, periodic, dim, cj_cache, cj->grav.parts, gcount_j, gcount_padded_j, shift_j, CoM_i, ci->grav.multipole, cj, e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { /* Not periodic -> Can always use Newtonian potential */ /* Let's updated the active cell(s) only */ if (ci_active) { /* First the P2P */ runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, gcount_padded_j, periodic, dim, e, ci->grav.parts, cj->grav.parts); /* Then the M2P */ if (allow_multipole_j) runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, periodic, dim, e, ci->grav.parts, gcount_i, cj); } if (cj_active && symmetric) { /* First the P2P */ runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, gcount_padded_i, periodic, dim, e, cj->grav.parts, ci->grav.parts); /* Then the M2P */ if (allow_multipole_i) runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, periodic, dim, e, cj->grav.parts, gcount_j, ci); } } else { /* Periodic BC */ /* Get the relative distance between the CoMs */ double dx[3] = {CoM_j[0] - CoM_i[0], CoM_j[1] - CoM_i[1], CoM_j[2] - CoM_i[2]}; /* Correct for periodic BCs */ dx[0] = nearestf(dx[0], dim[0]); dx[1] = nearestf(dx[1], dim[1]); dx[2] = nearestf(dx[2], dim[2]); const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; /* Get the maximal distance between any two particles */ const double max_r = sqrt(r2) + rmax_i + rmax_j; /* Do we need to use the truncated interactions ? */ if (max_r > min_trunc) { /* Periodic but far-away cells must use the truncated potential */ /* Let's updated the active cell(s) only */ if (ci_active) { /* First the (truncated) P2P */ runner_dopair_grav_pp_truncated(ci_cache, cj_cache, gcount_i, gcount_j, gcount_padded_j, dim, r_s_inv, e, ci->grav.parts, cj->grav.parts); /* Then the M2P */ if (allow_multipole_j) runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, dim, r_s_inv, e, ci->grav.parts, gcount_i, cj); } if (cj_active && symmetric) { /* First the (truncated) P2P */ runner_dopair_grav_pp_truncated(cj_cache, ci_cache, gcount_j, gcount_i, gcount_padded_i, dim, r_s_inv, e, cj->grav.parts, ci->grav.parts); /* Then the M2P */ if (allow_multipole_i) runner_dopair_grav_pm_truncated(cj_cache, gcount_padded_j, CoM_i, multi_i, dim, r_s_inv, e, cj->grav.parts, gcount_j, ci); } } else { /* Periodic but close-by cells can use the full Newtonian potential */ /* Let's updated the active cell(s) only */ if (ci_active) { /* First the (Newtonian) P2P */ runner_dopair_grav_pp_full(ci_cache, cj_cache, gcount_i, gcount_j, gcount_padded_j, periodic, dim, e, ci->grav.parts, cj->grav.parts); /* Then the M2P */ if (allow_multipole_j) runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, periodic, dim, e, ci->grav.parts, gcount_i, cj); } if (cj_active && symmetric) { /* First the (Newtonian) P2P */ runner_dopair_grav_pp_full(cj_cache, ci_cache, gcount_j, gcount_i, gcount_padded_i, periodic, dim, e, cj->grav.parts, ci->grav.parts); /* Then the M2P */ if (allow_multipole_i) runner_dopair_grav_pm_full(cj_cache, gcount_padded_j, CoM_i, multi_i, periodic, dim, e, cj->grav.parts, gcount_j, ci); } } } /* Write back to the particles in ci */ if (ci_active) { #ifndef SWIFT_TASKS_WITHOUT_ATOMICS lock_lock(&ci->grav.plock); #endif gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); #endif } /* Write back to the particles in cj */ if (cj_active && symmetric) { #ifndef SWIFT_TASKS_WITHOUT_ATOMICS lock_lock(&cj->grav.plock); #endif gravity_cache_write_back(cj_cache, cj->grav.parts, gcount_j); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS if (lock_unlock(&cj->grav.plock) != 0) error("Error unlocking cell"); #endif } TIMER_TOC(timer_dopair_grav_pp); } /** * @brief Compute the gravitational forces from particles in #cell cj onto * particles in #cell ci without using a cache for cj. * * This function does not update the particles in cj. It also does not * make use of the field tensors in ci. * The function recurses to the leaf level in ci (not cj!) and then either uses * M2P or P2P when too close. * * @param r The #runner object. * @param ci The cell containing particles to update. * @param cj The cell containing the particles sourcing the gravity. */ void runner_dopair_grav_pp_no_cache(struct runner *r, struct cell *restrict ci, const struct cell *restrict cj) { /* Recover some useful constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], (float)e->mesh->dim[2]}; /* Record activity status */ const int ci_active = cell_is_active_gravity(ci, e) && (ci->nodeID == e->nodeID); /* Anything to do here? */ if (!ci_active) return; if (ci->grav.count == 0 || cj->grav.count == 0) return; /* Recurse? */ if (ci->split) { for (int k = 0; k < 8; ++k) { if (ci->progeny[k] != NULL) { runner_dopair_grav_pp_no_cache(r, ci->progeny[k], cj); } } } else { /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { runner_dopair_grav_pp_full_no_cache( ci->grav.parts, ci->grav.count, cj->grav.parts, cj->grav.count, e, e->gravity_properties, &r->ci_gravity_cache, ci, cj->grav.multipole); } else { runner_dopair_grav_pp_truncated_no_cache( ci->grav.parts, ci->grav.count, cj->grav.parts, cj->grav.count, dim, e, e->gravity_properties, &r->ci_gravity_cache, ci, cj->grav.multipole); } } } /** * @brief Compute the non-truncated gravity interactions between all particles * of a cell and the particles of the other cell. * * The calculation is performed non-symmetrically using the pre-filled * #gravity_cache structures. The loop over the j cache should auto-vectorize. * * @param ci_cache #gravity_cache contaning the particles to be updated. * @param gcount The number of particles in the cell. * @param gcount_padded The number of particles in the cell padded to the * vector length. * * @param e The #engine (for debugging checks only). * @param gparts The #gpart in the cell (for debugging checks only). */ static INLINE void runner_doself_grav_pp_full( struct gravity_cache *restrict ci_cache, const int gcount, const int gcount_padded, const struct engine *e, struct gpart *gparts) { /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount; pid++) { /* Skip inactive particles */ if (!ci_cache->active[pid]) continue; const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; const float z_i = ci_cache->z[pid]; const float h_i = ci_cache->epsilon[pid]; /* Local accumulators for the acceleration */ float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Loop over every other particle in the cell. */ for (int pjd = 0; pjd < gcount_padded; pjd++) { /* No self interaction */ if (pid == pjd) continue; /* Get info about j */ const float x_j = ci_cache->x[pjd]; const float y_j = ci_cache->y[pjd]; const float z_j = ci_cache->z[pjd]; const float mass_j = ci_cache->m[pjd]; const float h_j = ci_cache->epsilon[pjd]; /* Compute the pairwise (square) distance. */ /* Note: no need for periodic wrapping inside a cell */ const float dx = x_j - x_i; const float dy = y_j - y_i; const float dz = z_j - z_i; const float r2 = dx * dx + dy * dy + dz * dz; /* Pick the maximal softening length of i and j */ const float h = max(h_i, h_j); const float h2 = h * h; const float h_inv = 1.f / h; const float h_inv_3 = h_inv * h_inv * h_inv; #ifdef SWIFT_DEBUG_CHECKS /* The gravity_cache are sometimes allocated with more place than required => flag with mass=0 */ if (gparts[pid].time_bin == time_bin_not_created && ci_cache->m[pid] != 0.) { error("Found an extra gpart in the gravity interaction"); } if (pjd < gcount && gparts[pjd].time_bin == time_bin_not_created && mass_j != 0.) { error("Found an extra gpart in the gravity interaction"); } if (r2 == 0.f && h2 == 0.) error("Interacting particles with 0 distance and 0 softening."); /* Check that particles have been drifted to the current time */ if (gparts[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && !gpart_is_inhibited(&gparts[pjd], e)) error("gpj not drifted to current time"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(&gparts[pid], e)) error("Updating an inhibited particle!"); /* Check that the particle we interact with was not inhibited */ if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) error("Inhibited particle used as gravity source."); /* Check that the particle was initialised */ if (gparts[pid].initialised == 0) error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ float f_ij, pot_ij; runner_iact_grav_pp_full(r2, h2, h_inv, h_inv_3, mass_j, &f_ij, &pot_ij); /* Store it back */ a_x += f_ij * dx; a_y += f_ij * dy; a_z += f_ij * dz; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) accumulate_inc_ll(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) accumulate_inc_ll(&gparts[pid].num_interacted_p2p); #endif } /* Store everything back in cache */ ci_cache->a_x[pid] += a_x; ci_cache->a_y[pid] += a_y; ci_cache->a_z[pid] += a_z; ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } /** * @brief Compute the truncated gravity interactions between all particles * of a cell and the particles of the other cell. * * The calculation is performed non-symmetrically using the pre-filled * #gravity_cache structures. The loop over the j cache should auto-vectorize. * * This function only makes sense in periodic BCs. * * @param ci_cache #gravity_cache contaning the particles to be updated. * @param gcount The number of particles in the cell. * @param gcount_padded The number of particles in the cell padded to the * vector length. * @param r_s_inv The inverse of the gravity-mesh smoothing-scale. * * @param e The #engine (for debugging checks only). * @param gparts The #gpart in the cell (for debugging checks only). */ static INLINE void runner_doself_grav_pp_truncated( struct gravity_cache *restrict ci_cache, const int gcount, const int gcount_padded, const float r_s_inv, const struct engine *e, struct gpart *gparts) { #ifdef SWIFT_DEBUG_CHECKS if (!e->s->periodic) error("Calling truncated PP function in non-periodic setup."); #endif /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount; pid++) { /* Skip inactive particles */ if (!ci_cache->active[pid]) continue; const float x_i = ci_cache->x[pid]; const float y_i = ci_cache->y[pid]; const float z_i = ci_cache->z[pid]; const float h_i = ci_cache->epsilon[pid]; /* Local accumulators for the acceleration and potential */ float a_x = 0.f, a_y = 0.f, a_z = 0.f, pot = 0.f; /* Make the compiler understand we are in happy vectorization land */ swift_align_information(float, ci_cache->x, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->y, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->z, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->m, SWIFT_CACHE_ALIGNMENT); swift_align_information(float, ci_cache->epsilon, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Loop over every other particle in the cell. */ for (int pjd = 0; pjd < gcount_padded; pjd++) { /* No self interaction */ if (pid == pjd) continue; /* Get info about j */ const float x_j = ci_cache->x[pjd]; const float y_j = ci_cache->y[pjd]; const float z_j = ci_cache->z[pjd]; const float mass_j = ci_cache->m[pjd]; const float h_j = ci_cache->epsilon[pjd]; /* Compute the pairwise (square) distance. */ /* Note: no need for periodic wrapping inside a cell */ const float dx = x_j - x_i; const float dy = y_j - y_i; const float dz = z_j - z_i; const float r2 = dx * dx + dy * dy + dz * dz; /* Pick the maximal softening length of i and j */ const float h = max(h_i, h_j); const float h2 = h * h; const float h_inv = 1.f / h; const float h_inv_3 = h_inv * h_inv * h_inv; #ifdef SWIFT_DEBUG_CHECKS /* The gravity_cache are sometimes allocated with more place than required => flag with mass=0 */ if (gparts[pid].time_bin == time_bin_not_created && ci_cache->m[pid] != 0.) { error("Found an extra gpart in the gravity interaction"); } if (pjd < gcount && gparts[pjd].time_bin == time_bin_not_created && mass_j != 0.) { error("Found an extra gpart in the gravity interaction"); } if (r2 == 0.f && h2 == 0.) error("Interacting particles with 0 distance and 0 softening."); /* Check that particles have been drifted to the current time */ if (gparts[pid].ti_drift != e->ti_current) error("gpi not drifted to current time"); if (pjd < gcount && gparts[pjd].ti_drift != e->ti_current && !gpart_is_inhibited(&gparts[pjd], e)) error("gpj not drifted to current time"); /* Check that we are not updated an inhibited particle */ if (gpart_is_inhibited(&gparts[pid], e)) error("Updating an inhibited particle!"); /* Check that the particle we interact with was not inhibited */ if (pjd < gcount && gpart_is_inhibited(&gparts[pjd], e) && mass_j != 0.f) error("Inhibited particle used as gravity source."); /* Check that the particle was initialised */ if (gparts[pid].initialised == 0) error("Adding forces to an un-initialised gpart."); #endif /* Interact! */ float f_ij, pot_ij; runner_iact_grav_pp_truncated(r2, h2, h_inv, h_inv_3, mass_j, r_s_inv, &f_ij, &pot_ij); /* Store it back */ a_x += f_ij * dx; a_y += f_ij * dy; a_z += f_ij * dz; pot += pot_ij; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) accumulate_inc_ll(&gparts[pid].num_interacted); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Update the P2P interaction counter if it's not a padded gpart */ if (pjd < gcount && !gpart_is_inhibited(&gparts[pjd], e)) accumulate_inc_ll(&gparts[pid].num_interacted_p2p); #endif } /* Store everything back in cache */ ci_cache->a_x[pid] += a_x; ci_cache->a_y[pid] += a_y; ci_cache->a_z[pid] += a_z; ci_cache->pot[pid] += pot; #ifdef SWIFT_GRAVITY_FORCE_CHECKS accumulate_add_f(&gparts[pid].a_grav_p2p[0], a_x); accumulate_add_f(&gparts[pid].a_grav_p2p[1], a_y); accumulate_add_f(&gparts[pid].a_grav_p2p[2], a_z); #endif } } /** * @brief Computes the interaction of all the particles in a cell with all the * other ones. * * This function switches between the full potential and the truncated one * depending on needs. * * This function starts by constructing the require #gravity_cache for the * cell and then call the specialised functions doing the actual work on * the cache. It then write the data back to the particles. * * @param r The #runner. * @param c The #cell. */ void runner_doself_grav_pp(struct runner *r, struct cell *c) { /* Recover some useful constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; const float r_s_inv = e->mesh->r_s_inv; const double min_trunc = e->mesh->r_cut_min; TIMER_TIC; #ifdef SWIFT_DEBUG_CHECKS if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); #endif /* Anything to do here? */ if (!cell_is_active_gravity(c, e)) return; /* Check that we are not doing something stupid */ if (c->split) error("Running P-P on a splitable cell"); /* Do we need to start by drifting things ? */ if (!cell_are_gpart_drifted(c, e)) error("Un-drifted gparts"); /* Start by constructing a cache for the particles */ struct gravity_cache *const ci_cache = &r->ci_gravity_cache; /* Shift to apply to the particles in the cell */ const double loc[3] = {c->loc[0] + 0.5 * c->width[0], c->loc[1] + 0.5 * c->width[1], c->loc[2] + 0.5 * c->width[2]}; /* Computed the padded counts */ const int gcount = c->grav.count; const int gcount_padded = gcount - (gcount % VEC_SIZE) + VEC_SIZE; /* Fill the cache */ gravity_cache_populate_no_mpole(e->max_active_bin, ci_cache, c->grav.parts, gcount, gcount_padded, loc, c, e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { /* Not periodic -> Can always use Newtonian potential */ runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, c->grav.parts); } else { /* Get the maximal distance between any two particles */ const double max_r = 2. * c->grav.multipole->r_max; /* Do we need to use the truncated interactions ? */ if (max_r > min_trunc) { /* Periodic but far-away cells must use the truncated potential */ runner_doself_grav_pp_truncated(ci_cache, gcount, gcount_padded, r_s_inv, e, c->grav.parts); } else { /* Periodic but close-by cells can use the full Newtonian potential */ runner_doself_grav_pp_full(ci_cache, gcount, gcount_padded, e, c->grav.parts); } } /* Write back to the particles */ #ifndef SWIFT_TASKS_WITHOUT_ATOMICS lock_lock(&c->grav.plock); #endif gravity_cache_write_back(ci_cache, c->grav.parts, gcount); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS if (lock_unlock(&c->grav.plock) != 0) error("Error unlocking cell"); #endif TIMER_TOC(timer_doself_grav_pp); } /** * @brief Computes the interaction of the field tensor and multipole * of two cells symmetrically. * * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. */ static INLINE void runner_dopair_grav_mm_symmetric(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { /* Some constants */ const struct engine *e = r->e; const struct gravity_props *props = e->gravity_properties; const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const float r_s_inv = e->mesh->r_s_inv; TIMER_TIC; /* Anything to do here? */ if ((!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) || (!cell_is_active_gravity_mm(cj, e) || cj->nodeID != engine_rank)) error("Invalid state in symmetric M-M calculation!"); /* Short-cut to the multipole */ const struct multipole *multi_i = &ci->grav.multipole->m_pole; const struct multipole *multi_j = &cj->grav.multipole->m_pole; #ifdef SWIFT_DEBUG_CHECKS if (ci == cj) error("Interacting a cell with itself using M2L"); if (multi_i->num_gpart == 0) error("Multipole i does not seem to have been set."); if (multi_j->num_gpart == 0) error("Multipole j does not seem to have been set."); if (ci->grav.multipole->pot.ti_init != e->ti_current) error("ci->grav tensor not initialised."); if (ci->grav.multipole->pot.ti_init != e->ti_current) error("cj->grav tensor not initialised."); if (ci->grav.ti_old_multipole != e->ti_current) error( "Undrifted multipole ci->grav.ti_old_multipole=%lld ci->nodeID=%d " "cj->nodeID=%d e->ti_current=%lld", ci->grav.ti_old_multipole, ci->nodeID, cj->nodeID, e->ti_current); if (cj->grav.ti_old_multipole != e->ti_current) error( "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " "ci->nodeID=%d e->ti_current=%lld", cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif #ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the multipoles * Note we impose a hierarchy to solve the dining philosopher problem */ if (ci < cj) { lock_lock(&ci->grav.mlock); lock_lock(&cj->grav.mlock); } else { lock_lock(&cj->grav.mlock); lock_lock(&ci->grav.mlock); } #endif /* Let's interact at this level */ gravity_M2L_symmetric(&ci->grav.multipole->pot, &cj->grav.multipole->pot, multi_i, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Unlock the multipoles */ if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); #endif TIMER_TOC(timer_dopair_grav_mm); } /** * @brief Computes the interaction of the field tensor in a cell with the * multipole of another cell. * * @param r The #runner. * @param ci The #cell with field tensor to interact. * @param cj The #cell with the multipole. */ static INLINE void runner_dopair_grav_mm_nonsym(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { /* Some constants */ const struct engine *e = r->e; const struct gravity_props *props = e->gravity_properties; const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const float r_s_inv = e->mesh->r_s_inv; TIMER_TIC; /* Anything to do here? */ if (!cell_is_active_gravity_mm(ci, e) || ci->nodeID != engine_rank) return; /* Short-cut to the multipole */ const struct multipole *multi_j = &cj->grav.multipole->m_pole; #ifdef SWIFT_DEBUG_CHECKS if (ci == cj) error("Interacting a cell with itself using M2L"); if (multi_j->num_gpart == 0) error("Multipole does not seem to have been set."); if (ci->grav.multipole->pot.ti_init != e->ti_current) error("ci->grav tensor not initialised."); if (cj->grav.ti_old_multipole != e->ti_current) error( "Undrifted multipole cj->grav.ti_old_multipole=%lld cj->nodeID=%d " "ci->nodeID=%d e->ti_current=%lld", cj->grav.ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); #endif #ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Lock the multipoles * Note we impose a hierarchy to solve the dining philosopher problem */ if (ci < cj) { lock_lock(&ci->grav.mlock); lock_lock(&cj->grav.mlock); } else { lock_lock(&cj->grav.mlock); lock_lock(&ci->grav.mlock); } #endif /* Let's interact at this level */ gravity_M2L_nonsym(&ci->grav.multipole->pot, multi_j, ci->grav.multipole->CoM, cj->grav.multipole->CoM, props, periodic, dim, r_s_inv); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS /* Unlock the multipoles */ if (lock_unlock(&ci->grav.mlock) != 0) error("Failed to unlock multipole"); if (lock_unlock(&cj->grav.mlock) != 0) error("Failed to unlock multipole"); #endif TIMER_TOC(timer_dopair_grav_mm); } /** * @brief Call the M-M calculation on two cells if active. * * @param r The #runner object. * @param ci The first #cell. * @param cj The second #cell. */ static INLINE void runner_dopair_grav_mm(struct runner *r, struct cell *restrict ci, struct cell *restrict cj) { const struct engine *e = r->e; /* What do we need to do? */ const int do_i = cell_is_active_gravity_mm(ci, e) && (ci->nodeID == e->nodeID); const int do_j = cell_is_active_gravity_mm(cj, e) && (cj->nodeID == e->nodeID); /* Do we need drifting first? */ if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); if (cj->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(cj, e); /* Interact! */ if (do_i && do_j) runner_dopair_grav_mm_symmetric(r, ci, cj); else if (do_i) runner_dopair_grav_mm_nonsym(r, ci, cj); else if (do_j) runner_dopair_grav_mm_nonsym(r, cj, ci); } /** * @brief Computes all the M-M interactions between all the well-separated (at * rebuild) pairs of progenies of the two cells. * * @param r The #runner thread. * @param flags The task flag containing the list of well-separated pairs as a * bit-field. * @param ci The first #cell. * @param cj The second #cell. */ void runner_dopair_grav_mm_progenies(struct runner *r, const long long flags, struct cell *restrict ci, struct cell *restrict cj) { const struct engine *e = r->e; /* Clear the flags */ runner_clear_grav_flags(ci, e); runner_clear_grav_flags(cj, e); /* Loop over all pairs of progenies */ for (int i = 0; i < 8; i++) { if (ci->progeny[i] != NULL) { for (int j = 0; j < 8; j++) { if (cj->progeny[j] != NULL) { struct cell *cpi = ci->progeny[i]; struct cell *cpj = cj->progeny[j]; const int flag = i * 8 + j; /* Did we agree to use an M-M interaction here at the last rebuild? */ if (flags & (1ULL << flag)) runner_dopair_grav_mm(r, cpi, cpj); } } } } } void runner_dopair_recursive_grav_pm(struct runner *r, struct cell *ci, const struct cell *cj) { const struct engine *e = r->e; /* Clear the flags */ runner_clear_grav_flags(ci, e); /* Some constants */ const int periodic = e->mesh->periodic; const float dim[3] = {(float)e->mesh->dim[0], (float)e->mesh->dim[1], (float)e->mesh->dim[2]}; const float r_s_inv = e->mesh->r_s_inv; /* Anything to do here? */ if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return; #ifdef SWIFT_DEBUG_CHECKS /* Early abort? */ if (ci->grav.count == 0 || cj->grav.count == 0) error("Doing pair gravity on an empty cell !"); /* Sanity check */ if (ci == cj) error("Pair interaction between a cell and itself."); if (cj->grav.ti_old_multipole != e->ti_current) error("cj->grav.multipole not drifted."); #endif /* Can we recurse further? */ if (ci->split) { /* Loop over ci's children */ for (int k = 0; k < 8; k++) { if (ci->progeny[k] != NULL) runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj); } /* Ok, let's do the interaction here */ } else { /* Start by constructing particle caches */ /* Cache to play with */ struct gravity_cache *const ci_cache = &r->ci_gravity_cache; /* Computed the padded counts */ const int gcount_i = ci->grav.count; const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; /* Recover the multipole info and the CoM locations */ const struct multipole *multi_j = &cj->grav.multipole->m_pole; const float CoM_j[3] = {(float)(cj->grav.multipole->CoM[0]), (float)(cj->grav.multipole->CoM[1]), (float)(cj->grav.multipole->CoM[2])}; #ifdef SWIFT_DEBUG_CHECKS if (cj->grav.count == 1) error("Constructing cache for M2P interaction with multipole of size 0!"); #endif /* Fill the cache */ gravity_cache_populate_all_mpole( e->max_active_bin, periodic, dim, ci_cache, ci->grav.parts, gcount_i, gcount_padded_i, ci, CoM_j, cj->grav.multipole, e->gravity_properties); /* Can we use the Newtonian version or do we need the truncated one ? */ if (!periodic) { runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, periodic, dim, e, ci->grav.parts, gcount_i, cj); } else { runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, dim, r_s_inv, e, ci->grav.parts, gcount_i, cj); } /* Write back to the particles */ #ifndef SWIFT_TASKS_WITHOUT_ATOMICS lock_lock(&ci->grav.plock); #endif gravity_cache_write_back(ci_cache, ci->grav.parts, gcount_i); #ifndef SWIFT_TASKS_WITHOUT_ATOMICS if (lock_unlock(&ci->grav.plock) != 0) error("Error unlocking cell"); #endif } } /** * @brief Computes the interaction of all the particles in a cell with all the * particles of another cell. * * This function will try to recurse as far down the tree as possible and only * default to direct summation if there is no better option. * * If using periodic BCs, we will abort the recursion if th distance between the * cells is larger than the set threshold. * * @param r The #runner. * @param ci The first #cell. * @param cj The other #cell. * @param gettimer Are we timing this ? */ void runner_dopair_recursive_grav(struct runner *r, struct cell *ci, struct cell *cj, const int gettimer) { const struct engine *e = r->e; /* Clear the flags */ runner_clear_grav_flags(ci, e); runner_clear_grav_flags(cj, e); /* Some constants */ const int nodeID = e->nodeID; const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const double max_distance = e->mesh->r_cut_max; /* Anything to do here? */ if (!((cell_is_active_gravity(ci, e) && ci->nodeID == nodeID) || (cell_is_active_gravity(cj, e) && cj->nodeID == nodeID))) return; #ifdef SWIFT_DEBUG_CHECKS const int gcount_i = ci->grav.count; const int gcount_j = cj->grav.count; /* Early abort? */ if (gcount_i == 0 || gcount_j == 0) error("Doing pair gravity on an empty cell !"); /* Sanity check */ if (ci == cj) error("Pair interaction between a cell and itself."); if (cell_is_active_gravity(ci, e) && ci->grav.ti_old_multipole != e->ti_current) error("ci->grav.multipole not drifted."); if (cell_is_active_gravity(cj, e) && cj->grav.ti_old_multipole != e->ti_current) error("cj->grav.multipole not drifted."); #endif TIMER_TIC; /* Recover the multipole information */ struct gravity_tensors *const multi_i = ci->grav.multipole; struct gravity_tensors *const multi_j = cj->grav.multipole; /* Get the distance between the CoMs */ double dx = multi_i->CoM[0] - multi_j->CoM[0]; double dy = multi_i->CoM[1] - multi_j->CoM[1]; double dz = multi_i->CoM[2] - multi_j->CoM[2]; /* Apply BC */ if (periodic) { dx = nearest(dx, dim[0]); dy = nearest(dy, dim[1]); dz = nearest(dz, dim[2]); } const double r2 = dx * dx + dy * dy + dz * dz; /* Minimal distance between any 2 particles in the two cells */ const double r_lr_check = sqrt(r2) - (multi_i->r_max + multi_j->r_max); /* Are we beyond the distance where the truncated forces are 0? */ if (periodic && r_lr_check > max_distance) { #ifdef SWIFT_DEBUG_CHECKS if (cell_is_active_gravity(ci, e)) accumulate_add_ll(&multi_i->pot.num_interacted, multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) accumulate_add_ll(&multi_j->pot.num_interacted, multi_i->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ if (cell_is_active_gravity(ci, e)) accumulate_add_ll(&multi_i->pot.num_interacted_pm, multi_j->m_pole.num_gpart); if (cell_is_active_gravity(cj, e)) accumulate_add_ll(&multi_j->pot.num_interacted_pm, multi_i->m_pole.num_gpart); #endif return; } /* OK, we actually need to compute this pair. Let's find the cheapest * option... */ if (ci->grav.count <= 1 || cj->grav.count <= 1) { /* We have two cheap cells. Go P-P. */ runner_dopair_grav_pp_no_cache(r, ci, cj); runner_dopair_grav_pp_no_cache(r, cj, ci); /* Can we use M-M interactions ? */ } else if (gravity_M2L_accept_symmetric(e->gravity_properties, multi_i, multi_j, r2, /* use_rebuild_sizes=*/0, periodic)) { /* Go M-M */ runner_dopair_grav_mm(r, ci, cj); /* Did we reach the bottom? */ } else if (!ci->split && !cj->split) { /* We have two leaves. Go P-P. */ runner_dopair_grav_pp(r, ci, cj, /*symmetric*/ 1, /*allow_mpoles=*/1); } else { /* Alright, we'll have to split and recurse. */ /* We know at least one of ci and cj is splittable */ const double ri_max = multi_i->r_max; const double rj_max = multi_j->r_max; /* Split the larger of the two cells and start over again */ if (ri_max > rj_max) { /* Can we actually split that interaction ? */ if (ci->split) { /* Loop over ci's children */ for (int k = 0; k < 8; k++) { if (ci->progeny[k] != NULL) runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); } } else { /* cj is split */ /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ /* Loop over cj's children */ for (int k = 0; k < 8; k++) { if (cj->progeny[k] != NULL) runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); } } } else { /* Can we actually split that interaction ? */ if (cj->split) { /* Loop over cj's children */ for (int k = 0; k < 8; k++) { if (cj->progeny[k] != NULL) runner_dopair_recursive_grav(r, ci, cj->progeny[k], 0); } } else { /* ci is split */ /* MATTHIEU: This could maybe be replaced by P-M interactions ? */ /* Loop over ci's children */ for (int k = 0; k < 8; k++) { if (ci->progeny[k] != NULL) runner_dopair_recursive_grav(r, ci->progeny[k], cj, 0); } } } } if (gettimer) TIMER_TOC(timer_dosub_pair_grav); } /** * @brief Computes the interaction of all the particles in a cell. * * This function will try to recurse as far down the tree as possible and only * default to direct summation if there is no better option. * * @param r The #runner. * @param c The first #cell. * @param gettimer Are we timing this ? */ void runner_doself_recursive_grav(struct runner *r, struct cell *c, const int gettimer) { /* Some constants */ const struct engine *e = r->e; /* Clear the flags */ runner_clear_grav_flags(c, e); #ifdef SWIFT_DEBUG_CHECKS /* Early abort? */ if (c->grav.count == 0) error("Doing self gravity on an empty cell !"); #endif TIMER_TIC; /* Anything to do here? */ if (!cell_is_active_gravity(c, e)) return; /* If the cell is split, interact each progeny with itself, and with each of its siblings. */ if (c->split) { for (int j = 0; j < 8; j++) { if (c->progeny[j] != NULL) { runner_doself_recursive_grav(r, c->progeny[j], 0); for (int k = j + 1; k < 8; k++) { if (c->progeny[k] != NULL) { runner_dopair_recursive_grav(r, c->progeny[j], c->progeny[k], 0); } } } } } /* If the cell is not split, then just go for it... */ else { runner_doself_grav_pp(r, c); } if (gettimer) TIMER_TOC(timer_dosub_self_grav); } /** * @brief Performs all M-M interactions between a given top-level cell and all * the other top-levels that are far enough. * * @param r The thread #runner. * @param ci The #cell of interest. * @param timer Are we timing this ? */ void runner_do_grav_long_range(struct runner *r, struct cell *ci, const int timer) { /* Some constants */ const struct engine *e = r->e; const int periodic = e->mesh->periodic; const double dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; const double max_distance2 = e->mesh->r_cut_max * e->mesh->r_cut_max; TIMER_TIC; /* Recover the list of top-level cells */ struct cell *cells = e->s->cells_top; int *cells_with_particles = e->s->cells_with_particles_top; const int nr_cells_with_particles = e->s->nr_cells_with_particles; /* Anything to do here? */ if (!cell_is_active_gravity(ci, e)) return; if (ci->nodeID != engine_rank) error("Non-local cell in long-range gravity task!"); /* Check multipole has been drifted */ if (ci->grav.ti_old_multipole < e->ti_current) cell_drift_multipole(ci, e); /* Get this cell's multipole information */ struct gravity_tensors *const multi_i = ci->grav.multipole; /* Find this cell's top-level (great-)parent */ struct cell *top = ci; while (top->parent != NULL) top = top->parent; /* Loop over all the top-level cells and go for a M-M interaction if * well-separated */ for (int n = 0; n < nr_cells_with_particles; ++n) { /* Handle on the top-level cell and it's gravity business*/ struct cell *cj = &cells[cells_with_particles[n]]; struct gravity_tensors *const multi_j = cj->grav.multipole; /* Avoid self contributions */ if (top == cj) continue; /* Skip empty cells */ if (multi_j->m_pole.M_000 == 0.f) continue; /* Can we escape early in the periodic BC case? */ if (periodic) { /* Minimal distance between any pair of particles */ const double min_radius2 = cell_min_dist2_same_size(top, cj, periodic, dim); /* Are we beyond the distance where the truncated forces are 0 ?*/ if (min_radius2 > max_distance2) { #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ accumulate_add_ll(&multi_i->pot.num_interacted, multi_j->m_pole.num_gpart); #endif #ifdef SWIFT_GRAVITY_FORCE_CHECKS /* Need to account for the interactions we missed */ accumulate_add_ll(&multi_i->pot.num_interacted_pm, multi_j->m_pole.num_gpart); #endif /* Record that this multipole received a contribution */ multi_i->pot.interacted = 1; /* We are done here. */ continue; } } if (cell_can_use_pair_mm(top, cj, e, e->s, /*use_rebuild_data=*/1, /*is_tree_walk=*/0)) { /* Call the PM interaction fucntion on the active sub-cells of ci */ runner_dopair_grav_mm_nonsym(r, ci, cj); // runner_dopair_recursive_grav_pm(r, ci, cj); /* Record that this multipole received a contribution */ multi_i->pot.interacted = 1; } /* We are in charge of this pair */ } /* Loop over top-level cells */ if (timer) TIMER_TOC(timer_dograv_long_range); }