diff --git a/examples/ZeldovichPancake_3D/plotSolution.py b/examples/ZeldovichPancake_3D/plotSolution.py index a040683a4e80ca2298d4b16f269bdabae0b00508..dd1c6243e4637b25da187e93c89490a655303153 100644 --- a/examples/ZeldovichPancake_3D/plotSolution.py +++ b/examples/ZeldovichPancake_3D/plotSolution.py @@ -192,4 +192,4 @@ ylim(0, 1) xticks([]) yticks([]) -savefig("ZeldovichPancake_%.3d.png"%snap, dpi=200) +savefig("ZeldovichPancake_%.4d.png"%snap, dpi=200) diff --git a/examples/ZeldovichPancake_3D/zeldovichPancake.yml b/examples/ZeldovichPancake_3D/zeldovichPancake.yml index d0bbe55222447ab5967c6c46b956fc1d6251ede4..39e02e3aab685bc4435d80e1d756b0b0c843c879 100644 --- a/examples/ZeldovichPancake_3D/zeldovichPancake.yml +++ b/examples/ZeldovichPancake_3D/zeldovichPancake.yml @@ -46,7 +46,6 @@ Cosmology: Gravity: mesh_side_length: 16 eta: 0.025 - theta: 0.85 - r_cut_max: 5. + theta: 0.8 comoving_softening: 0.0001 max_physical_softening: 0.0001 diff --git a/src/engine.c b/src/engine.c index c0eabed649881165d534ad16d584a7c56b310861..056720265fc5db3e21e18dc5f5d9e8a0efe200a4 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2379,8 +2379,16 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, } /* Recover the multipole information */ - struct gravity_tensors *const multi_i = ci->multipole; + const struct gravity_tensors *const multi_i = ci->multipole; const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], multi_i->CoM[2]}; + const double r_max_i = multi_i->r_max; + +#ifdef SWIFT_DEBUG_CHECKS + if (multi_i->r_max != multi_i->r_max_rebuild) + error( + "Multipole size not equal ot it's size after rebuild. But we just " + "rebuilt..."); +#endif /* Loop over every other cell */ for (int ii = 0; ii < cdim[0]; ii++) { @@ -2414,8 +2422,7 @@ void engine_make_self_gravity_tasks_mapper(void *map_data, int num_elements, const double r2 = dx * dx + dy * dy + dz * dz; /* Are the cells too close for a MM interaction ? */ - if (!gravity_M2L_accept(multi_i->r_max_rebuild, - multi_j->r_max_rebuild, theta_crit2, r2)) { + if (!gravity_M2L_accept(r_max_i, multi_j->r_max, theta_crit2, r2)) { /* Ok, we need to add a direct pair calculation */ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, @@ -3155,7 +3162,7 @@ void engine_maketasks(struct engine *e) { #else const int hydro_tasks_per_cell = 27 * 2; #endif - const int self_grav_tasks_per_cell = 27 * 2; + const int self_grav_tasks_per_cell = 125; const int ext_grav_tasks_per_cell = 1; if (e->policy & engine_policy_hydro) @@ -3730,7 +3737,7 @@ int engine_estimate_nr_tasks(struct engine *e) { #endif } if (e->policy & engine_policy_self_gravity) { - n1 += 32; + n1 += 125; n2 += 1; #ifdef WITH_MPI n2 += 2; diff --git a/src/gravity_cache.h b/src/gravity_cache.h index e5e78900afa1d1ed5cb66c58fbecb626ed9cbd8b..5d2338127acabc076e27ccaee9c1bc8748dbd661 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -286,6 +286,72 @@ gravity_cache_populate_no_mpole(const timebin_t max_active_bin, } } +/** + * @brief Fills a #gravity_cache structure with some #gpart and make them use + * the multi-pole. + * + * @param max_active_bin The largest active bin in the current time-step. + * @param c The #gravity_cache to fill. + * @param gparts The #gpart array to read from. + * @param gcount The number of particles to read. + * @param gcount_padded The number of particle to read padded to the next + * multiple of the vector length. + * @param cell The cell we play with (to get reasonable padding positions). + * @param grav_props The global gravity properties. + */ +__attribute__((always_inline)) INLINE static void +gravity_cache_populate_all_mpole(const timebin_t max_active_bin, + struct gravity_cache *c, + const struct gpart *restrict gparts, + const int gcount, const int gcount_padded, + const struct cell *cell, + const struct gravity_props *grav_props) { + + /* Make the compiler understand we are in happy vectorization land */ + swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, c->y, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, c->z, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, active, c->active, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(int, use_mpole, c->use_mpole, + SWIFT_CACHE_ALIGNMENT); + swift_assume_size(gcount_padded, VEC_SIZE); + + /* Fill the input caches */ + for (int i = 0; i < gcount; ++i) { + x[i] = (float)(gparts[i].x[0]); + y[i] = (float)(gparts[i].x[1]); + z[i] = (float)(gparts[i].x[2]); + epsilon[i] = gravity_get_softening(&gparts[i], grav_props); + m[i] = gparts[i].mass; + active[i] = (int)(gparts[i].time_bin <= max_active_bin); + use_mpole[i] = 1; + } + +#ifdef SWIFT_DEBUG_CHECKS + if (gcount_padded < gcount) error("Padded counter smaller than counter"); +#endif + + /* Particles used for padding should get impossible positions + * that have a reasonable magnitude. We use the cell width for this */ + const float pos_padded[3] = {-2.f * (float)cell->width[0], + -2.f * (float)cell->width[1], + -2.f * (float)cell->width[2]}; + const float eps_padded = epsilon[0]; + + /* Pad the caches */ + for (int i = gcount; i < gcount_padded; ++i) { + x[i] = pos_padded[0]; + y[i] = pos_padded[1]; + z[i] = pos_padded[2]; + epsilon[i] = eps_padded; + m[i] = 0.f; + active[i] = 0; + use_mpole[i] = 0; + } +} + /** * @brief Write the output cache values back to the active #gpart. * diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h index 66771439a42553c05e6d766a953fe349e2abbe7c..f11f760022d326cafd2ae5da2bc535273b8cc947 100644 --- a/src/kernel_long_gravity.h +++ b/src/kernel_long_gravity.h @@ -34,7 +34,7 @@ #define GADGET2_LONG_RANGE_CORRECTION #ifdef GADGET2_LONG_RANGE_CORRECTION -#define kernel_long_gravity_truncation_name "Gadget-2 (error function)" +#define kernel_long_gravity_truncation_name "Gadget-like (using erfc())" #else #define kernel_long_gravity_truncation_name "Exp-based Sigmoid" #endif diff --git a/src/multipole.h b/src/multipole.h index 231a4f5f1646a7baf41766a431991782256e55d0..d75704db2514ba6f1f175a47678a42a85a40eeaa 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -2397,7 +2397,8 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, * multipoles. */ __attribute__((always_inline)) INLINE static int gravity_M2L_accept( - double r_crit_a, double r_crit_b, double theta_crit2, double r2) { + const double r_crit_a, const double r_crit_b, const double theta_crit2, + const double r2) { const double size = r_crit_a + r_crit_b; const double size2 = size * size; @@ -2410,8 +2411,7 @@ __attribute__((always_inline)) INLINE static int gravity_M2L_accept( /** * @brief Checks whether a particle-cell interaction can be appromixated by a - * M2P - * interaction using the distance and cell radius. + * M2P interaction using the distance and cell radius. * * We use the multipole acceptance criterion of Dehnen, 2002, JCoPh, Volume 179, * Issue 1, pp.27-42, equation 10. @@ -2419,10 +2419,10 @@ __attribute__((always_inline)) INLINE static int gravity_M2L_accept( * @param r_max2 The square of the size of the multipole. * @param theta_crit2 The square of the critical opening angle. * @param r2 Square of the distance (periodically wrapped) between the - * multipoles. + * particle and the multipole. */ __attribute__((always_inline)) INLINE static int gravity_M2P_accept( - float r_max2, float theta_crit2, float r2) { + const float r_max2, const float theta_crit2, const float r2) { // MATTHIEU: Make this mass-dependent ? diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index c8cf98070895253852acfd760af8b2077d77dbf4..5109cc77a670481dcca37f080efe21c0a0c9bbf8 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -154,7 +154,7 @@ static INLINE void runner_dopair_grav_pp_full( struct gravity_cache *restrict cj_cache, const int gcount_i, const int gcount_j, const int gcount_padded_j, const int periodic, const float dim[3], const struct engine *restrict e, - struct gpart *restrict gparts_i, struct gpart *restrict gparts_j) { + struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { /* Loop over all particles in ci... */ for (int pid = 0; pid < gcount_i; pid++) { @@ -277,7 +277,7 @@ static INLINE void runner_dopair_grav_pp_truncated( struct gravity_cache *restrict cj_cache, const int gcount_i, const int gcount_j, const int gcount_padded_j, const float dim[3], const float r_s_inv, const struct engine *restrict e, - struct gpart *restrict gparts_i, struct gpart *restrict gparts_j) { + struct gpart *restrict gparts_i, const struct gpart *restrict gparts_j) { #ifdef SWIFT_DEBUG_CHECKS if (!e->s->periodic) @@ -455,7 +455,7 @@ static INLINE void runner_dopair_grav_pm_full( const float r2 = dx * dx + dy * dy + dz * dz; -#ifdef SWIFT_DEBUG_CHECKS +#ifdef SWIFT_DEBUG_CHECKSa const float r_max_j = cj->multipole->r_max; const float r_max2 = r_max_j * r_max_j; const float theta_crit2 = e->gravity_properties->theta_crit2; @@ -572,7 +572,7 @@ static INLINE void runner_dopair_grav_pm_truncated( const float r2 = dx * dx + dy * dy + dz * dz; -#ifdef SWIFT_DEBUG_CHECKS +#ifdef SWIFT_DEBUG_CHECKSa const float r_max_j = cj->multipole->r_max; const float r_max2 = r_max_j * r_max_j; const float theta_crit2 = e->gravity_properties->theta_crit2; @@ -1152,15 +1152,92 @@ static INLINE void runner_dopair_grav_mm(struct runner *r, cj->ti_old_multipole, cj->nodeID, ci->nodeID, e->ti_current); /* Let's interact at this level */ - if (0) - gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM, - cj->multipole->CoM, props, periodic, dim, r_s_inv); - - runner_dopair_grav_pp(r, ci, cj, 0); + gravity_M2L(&ci->multipole->pot, multi_j, ci->multipole->CoM, + cj->multipole->CoM, props, periodic, dim, r_s_inv); TIMER_TOC(timer_dopair_grav_mm); } +static INLINE void runner_dopair_recursive_grav_pm(struct runner *r, + struct cell *ci, + const struct cell *cj) { + /* Some constants */ + const struct engine *e = r->e; + const int periodic = e->mesh->periodic; + const float dim[3] = {e->mesh->dim[0], e->mesh->dim[1], e->mesh->dim[2]}; + const float r_s_inv = e->mesh->r_s_inv; + + /* Anything to do here? */ + if (!(cell_is_active_gravity(ci, e) && ci->nodeID == e->nodeID)) return; + +#ifdef SWIFT_DEBUG_CHECKS + /* Early abort? */ + if (ci->gcount == 0 || cj->gcount == 0) + error("Doing pair gravity on an empty cell !"); + + /* Sanity check */ + if (ci == cj) error("Pair interaction between a cell and itself."); + + if (cj->ti_old_multipole != e->ti_current) + error("cj->multipole not drifted."); +#endif + + /* Can we recurse further? */ + if (ci->split) { + + /* Loop over ci's children */ + for (int k = 0; k < 8; k++) { + if (ci->progeny[k] != NULL) + runner_dopair_recursive_grav_pm(r, ci->progeny[k], cj); + } + + /* Ok, let's do the interaction here */ + } else { + + /* Start by constructing particle caches */ + + /* Cache to play with */ + struct gravity_cache *const ci_cache = &r->ci_gravity_cache; + + /* Computed the padded counts */ + const int gcount_i = ci->gcount; + const int gcount_padded_i = gcount_i - (gcount_i % VEC_SIZE) + VEC_SIZE; + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that we fit in cache */ + if (gcount_i > ci_cache->count) + error("Not enough space in the cache! gcount_i=%d", gcount_i); +#endif + + /* Fill the cache */ + gravity_cache_populate_all_mpole(e->max_active_bin, ci_cache, ci->gparts, + gcount_i, gcount_padded_i, ci, + e->gravity_properties); + + /* Recover the multipole info and the CoM locations */ + const struct multipole *multi_j = &cj->multipole->m_pole; + const float CoM_j[3] = {(float)(cj->multipole->CoM[0]), + (float)(cj->multipole->CoM[1]), + (float)(cj->multipole->CoM[2])}; + + /* Can we use the Newtonian version or do we need the truncated one ? */ + if (!periodic) { + + runner_dopair_grav_pm_full(ci_cache, gcount_padded_i, CoM_j, multi_j, + periodic, dim, e, ci->gparts, gcount_i, cj); + + } else { + + runner_dopair_grav_pm_truncated(ci_cache, gcount_padded_i, CoM_j, multi_j, + dim, r_s_inv, e, ci->gparts, gcount_i, + cj); + } + + /* Write back to the particles */ + gravity_cache_write_back(ci_cache, ci->gparts, gcount_i); + } +} + /** * @brief Computes the interaction of all the particles in a cell with all the * particles of another cell. @@ -1392,8 +1469,6 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, const double theta_crit2 = e->gravity_properties->theta_crit2; const double max_distance = e->mesh->r_cut_max; - const int cdim[3] = {e->s->cdim[0], e->s->cdim[1], e->s->cdim[2]}; - TIMER_TIC; /* Recover the list of top-level cells */ @@ -1412,12 +1487,15 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Recover the local multipole */ struct gravity_tensors *const multi_i = ci->multipole; - // const double CoM_i[3] = {multi_i->CoM[0], multi_i->CoM[1], - // multi_i->CoM[2]}; const double CoM_rebuild_i[3] = {multi_i->CoM_rebuild[0], multi_i->CoM_rebuild[1], multi_i->CoM_rebuild[2]}; + + /* Flag that contributions will be recieved */ + multi_i->pot.interacted = 1; + int cc = 0; + /* Loop over all the top-level cells and go for a M-M interaction if * well-separated */ for (int n = 0; n < nr_cells; ++n) { @@ -1455,9 +1533,6 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, /* Are we beyond the distance where the truncated forces are 0 ?*/ if (periodic && max_radius > max_distance) { -/* message("hello"); */ -/* message("max_r = %e max_distance = %e", max_radius, max_distance); */ - #ifdef SWIFT_DEBUG_CHECKS /* Need to account for the interactions we missed */ multi_i->pot.num_interacted += multi_j->m_pole.num_gpart; @@ -1465,48 +1540,15 @@ static INLINE void runner_do_grav_long_range(struct runner *r, struct cell *ci, continue; } - const int cid = n; - const int i = cid / (cdim[1] * cdim[2]); - const int j = (cid / cdim[2]) % cdim[1]; - const int k = cid % cdim[2]; - + /* Call the PM interaction fucntion on the active sub-cells of ci */ ++cc; - if (0) message("Interacting with [%d %d %d] (%d)", i, j, k, cc); - - // MATTHIEU - runner_dopair_grav_mm(r, ci, cj); - continue; - - /* /\* Let's compute the current distance between the cell pair*\/ - */ - /* double dx = CoM_i[0] - multi_j->CoM[0]; */ - /* double dy = CoM_i[1] - multi_j->CoM[1]; */ - /* double dz = CoM_i[2] - multi_j->CoM[2]; */ - - /* /\* Apply BC *\/ */ - /* if (periodic) { */ - /* dx = nearest(dx, dim[0]); */ - /* dy = nearest(dy, dim[1]); */ - /* dz = nearest(dz, dim[2]); */ - /* } */ - /* const double r2 = dx * dx + dy * dy + dz * dz; */ - - /* /\* Check the multipole acceptance criterion *\/ */ - /* if (gravity_M2L_accept(multi_i->r_max, multi_j->r_max, - * theta_crit2, r2)) { */ - - /* /\* Go for a (non-symmetric) M-M calculation *\/ */ - /* runner_dopair_grav_mm(r, ci, cj); */ - /* } else { */ - /* /\* Alright, we have to take charge of that pair in a different - * way. *\/ */ - /* // MATTHIEU: We should actually open the tree-node here and - * recurse. */ - /* runner_dopair_grav_mm(r, ci, cj); */ - /* } */ + runner_dopair_recursive_grav_pm(r, ci, cj); + } /* We are in charge of this pair */ } /* Loop over top-level cells */ + message("cc=%d", cc); + if (timer) TIMER_TOC(timer_dograv_long_range); }