diff --git a/.gitignore b/.gitignore index 4e2f82309eb0dbf56f8eab50d2df14b65865d858..7d6d9021f12ebfcb837d19c443362f1ecbc4077f 100644 --- a/.gitignore +++ b/.gitignore @@ -91,6 +91,7 @@ theory/SPH/*.pdf theory/paper_pasc/pasc_paper.pdf theory/Multipoles/fmm.pdf theory/Multipoles/fmm_standalone.pdf +theory/Multipoles/potential.pdf m4/libtool.m4 m4/ltoptions.m4 diff --git a/README b/README index 6e2b2cb2304aaa1790646de5316b7c08362779bf..2dedb32a04a7cf143c3e65560c45a68c0e5d1c2a 100644 --- a/README +++ b/README @@ -27,6 +27,7 @@ Valid options are: -f {int} Overwrite the CPU frequency (Hz) to be used for time measurements. -g Run with an external gravitational potential. -G Run with self-gravity. + -M Reconstruct the multipoles every time-step. -n {int} Execute a fixed number of time steps. When unset use the time_end parameter to stop. -s Run with hydrodynamics. -S Run with stars. diff --git a/configure.ac b/configure.ac index 6c0a06005778c499c53ae8e596a0685a78886542..8a2d0f30ae297993b34153bc9a4c04085f4748f5 100644 --- a/configure.ac +++ b/configure.ac @@ -189,6 +189,18 @@ if test "$enable_task_debugging" = "yes"; then AC_DEFINE([SWIFT_DEBUG_TASKS],1,[Enable task debugging]) fi +# Check if the general timers are switched on. +AC_ARG_ENABLE([timers], + [AS_HELP_STRING([--enable-timers], + [Activate the basic timers @<:@yes/no@:>@] + )], + [enable_timers="$enableval"], + [enable_timers="no"] +) +if test "$enable_timers" = "yes"; then + AC_DEFINE([SWIFT_USE_TIMERS],1,[Enable individual timers]) +fi + # Check if expensive debugging is on. AC_ARG_ENABLE([debugging-checks], [AS_HELP_STRING([--enable-debugging-checks], @@ -215,6 +227,21 @@ elif test "$gravity_force_checks" != "no"; then AC_DEFINE_UNQUOTED([SWIFT_GRAVITY_FORCE_CHECKS], [$enableval] ,[Enable gravity brute-force checks]) fi +# Check if we want to zero the gravity forces for all particles below some ID. +AC_ARG_ENABLE([no-gravity-below-id], + [AS_HELP_STRING([--enable-no-gravity-below-id], + [Zeros the gravitational acceleration of all particles with an ID smaller than @<:@N@:>@] + )], + [no_gravity_below_id="$enableval"], + [no_gravity_below_id="no"] +) +if test "$no_gravity_below_id" == "yes"; then + AC_MSG_ERROR(Need to specify the ID below which particles get zero forces when using --enable-no-gravity-below-id!) +elif test "$no_gravity_below_id" != "no"; then + AC_DEFINE_UNQUOTED([SWIFT_NO_GRAVITY_BELOW_ID], [$enableval] ,[Particles with smaller ID than this will have zero gravity forces]) +fi + + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN @@ -854,12 +881,15 @@ AC_MSG_RESULT([ Adiabatic index : $with_gamma Riemann solver : $with_riemann Cooling function : $with_cooling - External potential : $with_potential - Multipole order : $with_multipole_order - - Task debugging : $enable_task_debugging - Debugging checks : $enable_debugging_checks - Gravity checks : $gravity_force_checks + + External potential : $with_potential + Multipole order : $with_multipole_order + No gravity below ID : $no_gravity_below_id + + Individual timers : $enable_timers + Task debugging : $enable_task_debugging + Debugging checks : $enable_debugging_checks + Gravity checks : $gravity_force_checks ]) # Make sure the latest git revision string gets included diff --git a/examples/Makefile.am b/examples/Makefile.am index dd13fb7eb4b82fbbfbb1ae450e20d01b13f2a455..1dd240fb6015fe5fdd2465cccb1bb221706efeed 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Common flags -MYFLAGS = -DTIMER +MYFLAGS = # Add the source directory and debug to CFLAGS AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) diff --git a/examples/main.c b/examples/main.c index 5f42f3e06ea46d0c3b73363956f470016e5d1498..631117148addd3ab7ad49ed2760855b793757870 100644 --- a/examples/main.c +++ b/examples/main.c @@ -82,6 +82,8 @@ void print_help_message() { "Run with an external gravitational potential."); printf(" %2s %8s %s\n", "-F", "", "Run with feedback."); printf(" %2s %8s %s\n", "-G", "", "Run with self-gravity."); + printf(" %2s %8s %s\n", "-M", "", + "Reconstruct the multipoles every time-step."); printf(" %2s %8s %s\n", "-n", "{int}", "Execute a fixed number of time steps. When unset use the time_end " "parameter to stop."); @@ -164,6 +166,7 @@ int main(int argc, char *argv[]) { int with_stars = 0; int with_fp_exceptions = 0; int with_drift_all = 0; + int with_mpole_reconstruction = 0; int verbose = 0; int nr_threads = 1; int with_verbose_timers = 0; @@ -172,7 +175,8 @@ int main(int argc, char *argv[]) { /* Parse the parameters */ int c; - while ((c = getopt(argc, argv, "acCdDef:FgGhn:sSt:Tv:y:")) != -1) switch (c) { + while ((c = getopt(argc, argv, "acCdDef:FgGhMn:sSt:Tv:y:")) != -1) + switch (c) { case 'a': with_aff = 1; break; @@ -210,6 +214,9 @@ int main(int argc, char *argv[]) { case 'h': if (myrank == 0) print_help_message(); return 0; + case 'M': + with_mpole_reconstruction = 1; + break; case 'n': if (sscanf(optarg, "%d", &nsteps) != 1) { if (myrank == 0) printf("Error parsing fixed number of steps.\n"); @@ -521,6 +528,8 @@ int main(int argc, char *argv[]) { /* Construct the engine policy */ int engine_policies = ENGINE_POLICY | engine_policy_steal; if (with_drift_all) engine_policies |= engine_policy_drift_all; + if (with_mpole_reconstruction) + engine_policies |= engine_policy_reconstruct_mpoles; if (with_hydro) engine_policies |= engine_policy_hydro; if (with_self_gravity) engine_policies |= engine_policy_self_gravity; if (with_external_gravity) engine_policies |= engine_policy_external_gravity; @@ -601,35 +610,25 @@ int main(int argc, char *argv[]) { engine_dump_snapshot(&e); /* Legend */ - if (myrank == 0) { + if (myrank == 0) printf("# %6s %14s %14s %10s %10s %10s %16s [%s]\n", "Step", "Time", "Time-step", "Updates", "g-Updates", "s-Updates", "Wall-clock time", clocks_getunit()); - if (with_verbose_timers) { - printf("timers: "); - for (int k = 0; k < timer_count; k++) printf("%s\t", timers_names[k]); - printf("\n"); - } - } + /* File for the timers */ + if (with_verbose_timers) timers_open_file(myrank); /* Main simulation loop */ for (int j = 0; !engine_is_done(&e) && e.step - 1 != nsteps; j++) { /* Reset timers */ - timers_reset(timers_mask_all); + timers_reset_all(); /* Take a step. */ engine_step(&e); /* Print the timers. */ - if (with_verbose_timers) { - printf("timers: "); - for (int k = 0; k < timer_count; k++) - printf("%.3f\t", clocks_from_ticks(timers[k])); - printf("\n"); - timers_reset(0xFFFFFFFFllu); - } + if (with_verbose_timers) timers_print(e.step); #ifdef SWIFT_DEBUG_TASKS /* Dump the task data using the given frequency. */ @@ -735,6 +734,7 @@ int main(int argc, char *argv[]) { #endif /* Clean everything */ + if (with_verbose_timers) timers_close_file(); engine_clean(&e); free(params); diff --git a/src/Makefile.am b/src/Makefile.am index 1da7a0d955e488c0b96ee209080b5438356a36bc..7bec5327f4759fcf7d3e1af9d041677ffbc7ab55 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -16,7 +16,7 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the debug flag to the whole thing -AM_CFLAGS = -DTIMER $(HDF5_CPPFLAGS) +AM_CFLAGS = $(HDF5_CPPFLAGS) # Assign a "safe" version number AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 @@ -46,7 +46,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ hydro_properties.h riemann.h threadpool.h cooling.h cooling_struct.h sourceterms.h \ sourceterms_struct.h statistics.h memswap.h cache.h runner_doiact_vec.h profiler.h \ dump.h logger.h active.h timeline.h xmf.h gravity_properties.h gravity_derivatives.h \ - vector_power.h collectgroup.h hydro_space.h sort_part.h + gravity_softened_derivatives.h vector_power.h collectgroup.h hydro_space.h sort_part.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ diff --git a/src/atomic.h b/src/atomic.h index be24f96e5a9d2e955132f0d6d34bdfa58bc1649c..4f180c643a304a801a9d70940fd12f9f193941e1 100644 --- a/src/atomic.h +++ b/src/atomic.h @@ -19,6 +19,9 @@ #ifndef SWIFT_ATOMIC_H #define SWIFT_ATOMIC_H +/* Config parameters. */ +#include "../config.h" + /* Includes. */ #include "inline.h" diff --git a/src/cache.h b/src/cache.h index 3ae092f269a8584ea34f15edbfb76bd92e698e30..638127b19a6a7063a474510e8b1471a45e8997d3 100644 --- a/src/cache.h +++ b/src/cache.h @@ -34,6 +34,7 @@ #define C2_CACHE_SIZE (NUM_VEC_PROC * VEC_SIZE * 6) + (NUM_VEC_PROC * VEC_SIZE) #define C2_CACHE_ALIGN sizeof(float) * VEC_SIZE +#ifdef WITH_VECTORIZATION /* Cache struct to hold a local copy of a cells' particle * properties required for density/force calculations.*/ struct cache { @@ -178,10 +179,10 @@ __attribute__((always_inline)) INLINE void cache_read_particles( #if defined(GADGET2_SPH) - /* Shift the particles positions to a local frame so single precision can be - * used instead of double precision. */ +/* Shift the particles positions to a local frame so single precision can be + * used instead of double precision. */ #if defined(WITH_VECTORIZATION) && defined(__ICC) -#pragma simd +#pragma vector aligned #endif for (int i = 0; i < ci->count; i++) { ci_cache->x[i] = ci->parts[i].x[0] - ci->loc[0]; @@ -380,7 +381,7 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( * used instead of double precision. Also shift the cell ci, particles positions * due to BCs but leave cell cj. */ #if defined(WITH_VECTORIZATION) && defined(__ICC) -#pragma simd +#pragma vector aligned #endif for (int i = first_pi_align; i < ci->count; i++) { /* Make sure ci_cache is filled from the first element. */ @@ -397,14 +398,24 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( ci_cache->vz[ci_cache_idx] = ci->parts[idx].v[2]; } - /* Pad cache with fake particles that exist outside the cell so will not interact.*/ - float fake_pix = 2.0f * ci_cache->x[ci->count - 1]; + /* Pad cache with fake particles that exist outside the cell so will not + * interact.*/ + float fake_pix = 2.0f * ci->parts[sort_i[ci->count - 1].i].x[0]; for (int i = ci->count - first_pi_align; - i < ci->count - first_pi_align + VEC_SIZE; i++) + i < ci->count - first_pi_align + VEC_SIZE; i++) { ci_cache->x[i] = fake_pix; + ci_cache->y[i] = 1.f; + ci_cache->z[i] = 1.f; + ci_cache->h[i] = 1.f; + + ci_cache->m[i] = 1.f; + ci_cache->vx[i] = 1.f; + ci_cache->vy[i] = 1.f; + ci_cache->vz[i] = 1.f; + } #if defined(WITH_VECTORIZATION) && defined(__ICC) -#pragma simd +#pragma vector aligned #endif for (int i = 0; i <= last_pj_align; i++) { idx = sort_j[i].i; @@ -419,10 +430,20 @@ __attribute__((always_inline)) INLINE void cache_read_two_partial_cells_sorted( cj_cache->vz[i] = cj->parts[idx].v[2]; } - /* Pad cache with fake particles that exist outside the cell so will not interact.*/ - float fake_pjx = 2.0f * cj_cache->x[last_pj_align]; - for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) + /* Pad cache with fake particles that exist outside the cell so will not + * interact.*/ + float fake_pjx = 2.0f * cj->parts[sort_j[cj->count - 1].i].x[0]; + for (int i = last_pj_align + 1; i < last_pj_align + 1 + VEC_SIZE; i++) { cj_cache->x[i] = fake_pjx; + cj_cache->y[i] = 1.f; + cj_cache->z[i] = 1.f; + cj_cache->h[i] = 1.f; + + cj_cache->m[i] = 1.f; + cj_cache->vx[i] = 1.f; + cj_cache->vy[i] = 1.f; + cj_cache->vz[i] = 1.f; + } } /* @brief Clean the memory allocated by a #cache object. @@ -443,4 +464,6 @@ static INLINE void cache_clean(struct cache *c) { } } +#endif /* WITH_VECTORIZATION */ + #endif /* SWIFT_CACHE_H */ diff --git a/src/cell.c b/src/cell.c index 97ad8e051f0d484c5fc9d12de02898ac0c0cfe2f..aa5f3687ae02f1b88ffc7d351e42d4dfc1ff1468 100644 --- a/src/cell.c +++ b/src/cell.c @@ -129,6 +129,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { temp->depth = c->depth + 1; temp->split = 0; temp->dx_max = 0.f; + temp->dx_max_sort = 0.f; temp->nodeID = c->nodeID; temp->parent = c; c->progeny[k] = temp; @@ -1103,33 +1104,93 @@ void cell_reset_task_counters(struct cell *c) { } /** - * @brief Checks whether the cells are direct neighbours ot not. Both cells have - * to be of the same size + * @brief Recursively construct all the multipoles in a cell hierarchy. * - * @param ci First #cell. - * @param cj Second #cell. - * - * @todo Deal with periodicity. + * @param c The #cell. + * @param ti_current The current integer time. */ -int cell_are_neighbours(const struct cell *restrict ci, - const struct cell *restrict cj) { +void cell_make_multipoles(struct cell *c, integertime_t ti_current) { -#ifdef SWIFT_DEBUG_CHECKS - if (ci->width[0] != cj->width[0]) error("Cells of different size !"); -#endif + /* Reset everything */ + gravity_reset(c->multipole); - /* Maximum allowed distance */ - const double min_dist = - 1.2 * ci->width[0]; /* 1.2 accounts for rounding errors */ + if (c->split) { - /* (Manhattan) Distance between the cells */ - for (int k = 0; k < 3; k++) { - const double center_i = ci->loc[k]; - const double center_j = cj->loc[k]; - if (fabs(center_i - center_j) > min_dist) return 0; + /* Compute CoM of all progenies */ + double CoM[3] = {0., 0., 0.}; + double mass = 0.; + + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + const struct gravity_tensors *m = c->progeny[k]->multipole; + CoM[0] += m->CoM[0] * m->m_pole.M_000; + CoM[1] += m->CoM[1] * m->m_pole.M_000; + CoM[2] += m->CoM[2] * m->m_pole.M_000; + mass += m->m_pole.M_000; + } + } + c->multipole->CoM[0] = CoM[0] / mass; + c->multipole->CoM[1] = CoM[1] / mass; + c->multipole->CoM[2] = CoM[2] / mass; + + /* Now shift progeny multipoles and add them up */ + struct multipole temp; + double r_max = 0.; + for (int k = 0; k < 8; ++k) { + if (c->progeny[k] != NULL) { + const struct cell *cp = c->progeny[k]; + const struct multipole *m = &cp->multipole->m_pole; + + /* Contribution to multipole */ + gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM); + gravity_multipole_add(&c->multipole->m_pole, &temp); + + /* Upper limit of max CoM<->gpart distance */ + const double dx = c->multipole->CoM[0] - cp->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] - cp->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] - cp->multipole->CoM[2]; + const double r2 = dx * dx + dy * dy + dz * dz; + r_max = max(r_max, cp->multipole->r_max + sqrt(r2)); + } + } + /* Alternative upper limit of max CoM<->gpart distance */ + const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + + /* Take minimum of both limits */ + c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + + } else { + + if (c->gcount > 0) { + gravity_P2M(c->multipole, c->gparts, c->gcount); + const double dx = c->multipole->CoM[0] > c->loc[0] + c->width[0] / 2. + ? c->multipole->CoM[0] - c->loc[0] + : c->loc[0] + c->width[0] - c->multipole->CoM[0]; + const double dy = c->multipole->CoM[1] > c->loc[1] + c->width[1] / 2. + ? c->multipole->CoM[1] - c->loc[1] + : c->loc[1] + c->width[1] - c->multipole->CoM[1]; + const double dz = c->multipole->CoM[2] > c->loc[2] + c->width[2] / 2. + ? c->multipole->CoM[2] - c->loc[2] + : c->loc[2] + c->width[2] - c->multipole->CoM[2]; + c->multipole->r_max = sqrt(dx * dx + dy * dy + dz * dz); + } else { + gravity_multipole_init(&c->multipole->m_pole); + c->multipole->CoM[0] = c->loc[0] + c->width[0] / 2.; + c->multipole->CoM[1] = c->loc[1] + c->width[1] / 2.; + c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; + c->multipole->r_max = 0.; + } } - return 1; + c->ti_old_multipole = ti_current; } /** @@ -1145,6 +1206,8 @@ void cell_check_multipole(struct cell *c, void *data) { struct gravity_tensors ma; const double tolerance = 1e-3; /* Relative */ + return; + /* First recurse */ if (c->split) for (int k = 0; k < 8; k++) @@ -1244,28 +1307,44 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { /* Un-skip the density tasks involved with this cell. */ for (struct link *l = c->density; l != NULL; l = l->next) { struct task *t = l->t; - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; scheduler_activate(s, t); /* Set the correct sorting flags */ if (t->type == task_type_pair) { + if (ci->dx_max_sort > space_maxreldx * ci->dmin) { + for (struct cell *finger = ci; finger != NULL; finger = finger->parent) + finger->sorted = 0; + } + if (cj->dx_max_sort > space_maxreldx * cj->dmin) { + for (struct cell *finger = cj; finger != NULL; finger = finger->parent) + finger->sorted = 0; + } if (!(ci->sorted & (1 << t->flags))) { - atomic_or(&ci->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(ci->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, ci->sorts); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); } if (!(cj->sorted & (1 << t->flags))) { - atomic_or(&cj->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(cj->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, cj->sorts); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); } } - /* Check whether there was too much particle motion */ + /* Only interested in pair interactions as of here. */ if (t->type == task_type_pair || t->type == task_type_sub_pair) { - if (t->tight && - (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx * ci->h_max || - cj->dx_max > space_maxreldx * cj->h_max)) + + /* Check whether there was too much particle motion, i.e. the + cell neighbour conditions were violated. */ + if (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) rebuild = 1; #ifdef WITH_MPI @@ -1287,10 +1366,12 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (cj->super->drift) - scheduler_activate(s, cj->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -1323,10 +1404,12 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (ci->super->drift) - scheduler_activate(s, ci->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -1341,6 +1424,14 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); } + } else if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); + } +#else + if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); } #endif } @@ -1355,7 +1446,6 @@ int cell_unskip_tasks(struct cell *c, struct scheduler *s) { scheduler_activate(s, l->t); if (c->extra_ghost != NULL) scheduler_activate(s, c->extra_ghost); if (c->ghost != NULL) scheduler_activate(s, c->ghost); - if (c->init != NULL) scheduler_activate(s, c->init); if (c->init_grav != NULL) scheduler_activate(s, c->init_grav); if (c->drift != NULL) scheduler_activate(s, c->drift); if (c->kick1 != NULL) scheduler_activate(s, c->kick1); @@ -1409,7 +1499,9 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { /* Drift from the last time the cell was drifted to the current time */ const double dt = (ti_current - ti_old) * timeBase; - float dx_max = 0.f, dx2_max = 0.f, cell_h_max = 0.f; + float dx_max = 0.f, dx2_max = 0.f; + float dx_max_sort = 0.0f, dx2_max_sort = 0.f; + float cell_h_max = 0.f; /* Check that we are actually going to move forward. */ if (ti_current < ti_old) error("Attempt to drift to the past"); @@ -1421,8 +1513,13 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { struct cell *cp = c->progeny[k]; + + /* Collect */ cell_drift_particles(cp, e); + + /* Update */ dx_max = max(dx_max, cp->dx_max); + dx_max_sort = max(dx_max_sort, cp->dx_max_sort); cell_h_max = max(cell_h_max, cp->h_max); } @@ -1443,6 +1540,11 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { gp->x_diff[1] * gp->x_diff[1] + gp->x_diff[2] * gp->x_diff[2]; dx2_max = max(dx2_max, dx2); + + /* Init gravity force fields. */ + if (gpart_is_active(gp, e)) { + gravity_init_gpart(gp); + } } /* Loop over all the gas particles in the cell */ @@ -1464,9 +1566,18 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { xp->x_diff[1] * xp->x_diff[1] + xp->x_diff[2] * xp->x_diff[2]; dx2_max = max(dx2_max, dx2); + const float dx2_sort = xp->x_diff_sort[0] * xp->x_diff_sort[0] + + xp->x_diff_sort[1] * xp->x_diff_sort[1] + + xp->x_diff_sort[2] * xp->x_diff_sort[2]; + dx2_max_sort = max(dx2_max_sort, dx2_sort); /* Maximal smoothing length */ cell_h_max = max(cell_h_max, p->h); + + /* Get ready for a density calculation */ + if (part_is_active(p, e)) { + hydro_init_part(p, &e->s->hs); + } } /* Loop over all the star particles in the cell */ @@ -1484,16 +1595,19 @@ void cell_drift_particles(struct cell *c, const struct engine *e) { /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); + dx_max_sort = sqrtf(dx2_max_sort); } else { cell_h_max = c->h_max; dx_max = c->dx_max; + dx_max_sort = c->dx_max_sort; } /* Store the values */ c->h_max = cell_h_max; c->dx_max = dx_max; + c->dx_max_sort = dx_max_sort; /* Update the time of the last drift */ c->ti_old = ti_current; diff --git a/src/cell.h b/src/cell.h index b3bc7363f59ba6f9b117bfa690088ab9de40a28e..c89e70960e84bb027f5e99a3cb362f2e0722b9bd 100644 --- a/src/cell.h +++ b/src/cell.h @@ -148,9 +148,6 @@ struct cell { /*! Linked list of the tasks computing this cell's gravity forces. */ struct link *grav; - /*! The particle initialistation task */ - struct task *init; - /*! The multipole initialistation task */ struct task *init_grav; @@ -239,6 +236,9 @@ struct cell { /*! Last (integer) time the cell's particle was drifted forward in time. */ integertime_t ti_old; + /*! Last (integer) time the cell's sort arrays were updated. */ + integertime_t ti_sort; + /*! Last (integer) time the cell's multipole was drifted forward in time. */ integertime_t ti_old_multipole; @@ -248,6 +248,9 @@ struct cell { /*! Maximum particle movement in this cell since last construction. */ float dx_max; + /*! Maximum particle movement in this cell since the last sort. */ + float dx_max_sort; + /*! Nr of #part in this cell. */ int count; @@ -351,8 +354,7 @@ int cell_link_gparts(struct cell *c, struct gpart *gparts); int cell_link_sparts(struct cell *c, struct spart *sparts); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); -int cell_are_neighbours(const struct cell *restrict ci, - const struct cell *restrict cj); +void cell_make_multipoles(struct cell *c, integertime_t ti_current); void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); void cell_check_particle_drift_point(struct cell *c, void *data); diff --git a/src/drift.h b/src/drift.h index 9957d829fd28e033df9db325186195a3b396738c..d9b79f7f0549d85b6f05e8ce4a394aaa5b2a4d8d 100644 --- a/src/drift.h +++ b/src/drift.h @@ -101,10 +101,12 @@ __attribute__((always_inline)) INLINE static void drift_part( /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, dt); - /* Compute offset since last cell construction */ - xp->x_diff[0] -= xp->v_full[0] * dt; - xp->x_diff[1] -= xp->v_full[1] * dt; - xp->x_diff[2] -= xp->v_full[2] * dt; + /* Compute offsets since last cell construction */ + for (int k = 0; k < 3; k++) { + const float dx = xp->v_full[k] * dt; + xp->x_diff[k] -= dx; + xp->x_diff_sort[k] -= dx; + } } /** diff --git a/src/engine.c b/src/engine.c index 0fa2844e26e1e574fdda2d8d7292c685db4b4cf6..ee4838399924573a765466290b2fb7b1d37bcf87 100644 --- a/src/engine.c +++ b/src/engine.c @@ -144,47 +144,37 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { /* Local tasks only... */ if (c->nodeID == e->nodeID) { - /* Add the init task. */ - c->init = scheduler_addtask(s, task_type_init, task_subtype_none, 0, 0, c, - NULL, 0); - /* Add the two half kicks */ c->kick1 = scheduler_addtask(s, task_type_kick1, task_subtype_none, 0, 0, - c, NULL, 0); + c, NULL); c->kick2 = scheduler_addtask(s, task_type_kick2, task_subtype_none, 0, 0, - c, NULL, 0); + c, NULL); /* Add the time-step calculation task and its dependency */ c->timestep = scheduler_addtask(s, task_type_timestep, task_subtype_none, - 0, 0, c, NULL, 0); + 0, 0, c, NULL); scheduler_addunlock(s, c->kick2, c->timestep); scheduler_addunlock(s, c->timestep, c->kick1); - /* Add the drift task and its dependencies. */ - c->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, 0, - c, NULL, 0); - - scheduler_addunlock(s, c->drift, c->init); - if (is_self_gravity) { /* Initialisation of the multipoles */ c->init_grav = scheduler_addtask(s, task_type_init_grav, - task_subtype_none, 0, 0, c, NULL, 0); + task_subtype_none, 0, 0, c, NULL); /* Gravity non-neighbouring pm calculations */ c->grav_long_range = scheduler_addtask( - s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL, 0); + s, task_type_grav_long_range, task_subtype_none, 0, 0, c, NULL); /* Gravity top-level periodic calculation */ - c->grav_top_level = scheduler_addtask( - s, task_type_grav_top_level, task_subtype_none, 0, 0, c, NULL, 0); + c->grav_top_level = scheduler_addtask(s, task_type_grav_top_level, + task_subtype_none, 0, 0, c, NULL); /* Gravity recursive down-pass */ c->grav_down = scheduler_addtask(s, task_type_grav_down, - task_subtype_none, 0, 0, c, NULL, 0); + task_subtype_none, 0, 0, c, NULL); scheduler_addunlock(s, c->init_grav, c->grav_long_range); scheduler_addunlock(s, c->init_grav, c->grav_top_level); @@ -196,19 +186,19 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { /* Generate the ghost task. */ if (is_hydro) c->ghost = scheduler_addtask(s, task_type_ghost, task_subtype_none, 0, - 0, c, NULL, 0); + 0, c, NULL); #ifdef EXTRA_HYDRO_LOOP /* Generate the extra ghost task. */ if (is_hydro) c->extra_ghost = scheduler_addtask(s, task_type_extra_ghost, - task_subtype_none, 0, 0, c, NULL, 0); + task_subtype_none, 0, 0, c, NULL); #endif /* Cooling task */ if (is_with_cooling) { c->cooling = scheduler_addtask(s, task_type_cooling, task_subtype_none, - 0, 0, c, NULL, 0); + 0, 0, c, NULL); scheduler_addunlock(s, c->cooling, c->kick2); } @@ -216,7 +206,7 @@ void engine_make_hierarchical_tasks(struct engine *e, struct cell *c) { /* add source terms */ if (is_with_sourceterms) { c->sourceterms = scheduler_addtask(s, task_type_sourceterms, - task_subtype_none, 0, 0, c, NULL, 0); + task_subtype_none, 0, 0, c, NULL); } } @@ -1020,19 +1010,15 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, /* Create the tasks and their dependencies? */ if (t_xv == NULL) { - if (ci->super->drift == NULL) - ci->super->drift = scheduler_addtask( - s, task_type_drift, task_subtype_none, 0, 0, ci->super, NULL, 0); - t_xv = scheduler_addtask(s, task_type_send, task_subtype_xv, 4 * ci->tag, - 0, ci, cj, 0); + 0, ci, cj); t_rho = scheduler_addtask(s, task_type_send, task_subtype_rho, - 4 * ci->tag + 1, 0, ci, cj, 0); + 4 * ci->tag + 1, 0, ci, cj); t_ti = scheduler_addtask(s, task_type_send, task_subtype_tend, - 4 * ci->tag + 2, 0, ci, cj, 0); + 4 * ci->tag + 2, 0, ci, cj); #ifdef EXTRA_HYDRO_LOOP t_gradient = scheduler_addtask(s, task_type_send, task_subtype_gradient, - 4 * ci->tag + 3, 0, ci, cj, 0); + 4 * ci->tag + 3, 0, ci, cj); #endif #ifdef EXTRA_HYDRO_LOOP @@ -1063,7 +1049,10 @@ void engine_addtasks_send(struct engine *e, struct cell *ci, struct cell *cj, #endif /* Drift before you send */ - scheduler_addunlock(s, ci->super->drift, t_xv); + if (ci->drift == NULL) + ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, 0, + 0, ci, NULL); + scheduler_addunlock(s, ci->drift, t_xv); /* The super-cell's timestep task should unlock the send_ti task. */ scheduler_addunlock(s, ci->super->timestep, t_ti); @@ -1114,14 +1103,14 @@ void engine_addtasks_recv(struct engine *e, struct cell *c, struct task *t_xv, /* Create the tasks. */ t_xv = scheduler_addtask(s, task_type_recv, task_subtype_xv, 4 * c->tag, 0, - c, NULL, 0); + c, NULL); t_rho = scheduler_addtask(s, task_type_recv, task_subtype_rho, - 4 * c->tag + 1, 0, c, NULL, 0); + 4 * c->tag + 1, 0, c, NULL); t_ti = scheduler_addtask(s, task_type_recv, task_subtype_tend, - 4 * c->tag + 2, 0, c, NULL, 0); + 4 * c->tag + 2, 0, c, NULL); #ifdef EXTRA_HYDRO_LOOP t_gradient = scheduler_addtask(s, task_type_recv, task_subtype_gradient, - 4 * c->tag + 3, 0, c, NULL, 0); + 4 * c->tag + 3, 0, c, NULL); #endif } c->recv_xv = t_xv; @@ -1677,8 +1666,7 @@ void engine_make_self_gravity_tasks(struct engine *e) { if (ci->nodeID != nodeID) continue; /* If the cells is local build a self-interaction */ - scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL, - 0); + scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL); /* Loop over every other cell */ for (int cjd = cid + 1; cjd < nr_cells; ++cjd) { @@ -1693,9 +1681,9 @@ void engine_make_self_gravity_tasks(struct engine *e) { /* Are the cells to close for a MM interaction ? */ if (!gravity_multipole_accept(ci->multipole, cj->multipole, - theta_crit_inv)) + theta_crit_inv, 1)) scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, - cj, 1); + cj); } } } @@ -1720,7 +1708,7 @@ void engine_make_external_gravity_tasks(struct engine *e) { /* If the cell is local, build a self-interaction */ scheduler_addtask(sched, task_type_self, task_subtype_external_grav, 0, 0, - ci, NULL, 0); + ci, NULL); } } @@ -1758,7 +1746,7 @@ void engine_make_hydroloop_tasks(struct engine *e) { /* If the cells is local build a self-interaction */ if (ci->nodeID == nodeID) scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, - ci, NULL, 0); + ci, NULL); /* Now loop over all the neighbours of this cell */ for (int ii = -1; ii < 2; ii++) { @@ -1787,7 +1775,7 @@ void engine_make_hydroloop_tasks(struct engine *e) { const int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; scheduler_addtask(sched, task_type_pair, task_subtype_density, - sid, 0, ci, cj, 1); + sid, 0, ci, cj); } } } @@ -1816,15 +1804,22 @@ void engine_count_and_link_tasks(struct engine *e) { struct cell *const ci = t->ci; struct cell *const cj = t->cj; - /* Link sort tasks together. */ - if (t->type == task_type_sort && ci->split) - for (int j = 0; j < 8; j++) - if (ci->progeny[j] != NULL && ci->progeny[j]->sorts != NULL) { - scheduler_addunlock(sched, ci->progeny[j]->sorts, t); - } + /* Link sort tasks to the next-higher sort task. */ + if (t->type == task_type_sort) { + struct cell *finger = t->ci->parent; + while (finger != NULL && finger->sorts == NULL) finger = finger->parent; + if (finger != NULL) scheduler_addunlock(sched, t, finger->sorts); + } + + /* Link drift tasks to the next-higher drift task. */ + else if (t->type == task_type_drift) { + struct cell *finger = ci->parent; + while (finger != NULL && finger->drift == NULL) finger = finger->parent; + if (finger != NULL) scheduler_addunlock(sched, t, finger->drift); + } /* Link self tasks to cells. */ - if (t->type == task_type_self) { + else if (t->type == task_type_self) { atomic_inc(&ci->nr_tasks); if (t->subtype == task_subtype_density) { engine_addlink(e, &ci->density, t); @@ -1895,7 +1890,6 @@ static inline void engine_make_self_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> gravity --> grav_down --> kick */ - scheduler_addunlock(sched, c->super->init, gravity); scheduler_addunlock(sched, c->super->init_grav, gravity); scheduler_addunlock(sched, gravity, c->super->grav_down); } @@ -1912,7 +1906,7 @@ static inline void engine_make_external_gravity_dependencies( struct scheduler *sched, struct task *gravity, struct cell *c) { /* init --> external gravity --> kick */ - scheduler_addunlock(sched, c->super->init, gravity); + scheduler_addunlock(sched, c->drift, gravity); scheduler_addunlock(sched, gravity, c->super->kick2); } @@ -2008,9 +2002,8 @@ static inline void engine_make_hydro_loops_dependencies( struct scheduler *sched, struct task *density, struct task *gradient, struct task *force, struct cell *c, int with_cooling) { - /* init --> density loop --> ghost --> gradient loop --> extra_ghost */ + /* density loop --> ghost --> gradient loop --> extra_ghost */ /* extra_ghost --> force loop */ - scheduler_addunlock(sched, c->super->init, density); scheduler_addunlock(sched, density, c->super->ghost); scheduler_addunlock(sched, c->super->ghost, gradient); scheduler_addunlock(sched, gradient, c->super->extra_ghost); @@ -2041,8 +2034,7 @@ static inline void engine_make_hydro_loops_dependencies(struct scheduler *sched, struct task *force, struct cell *c, int with_cooling) { - /* init --> density loop --> ghost --> force loop */ - scheduler_addunlock(sched, c->super->init, density); + /* density loop --> ghost --> force loop */ scheduler_addunlock(sched, density, c->super->ghost); scheduler_addunlock(sched, c->super->ghost, force); @@ -2078,15 +2070,23 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { for (int ind = 0; ind < nr_tasks; ind++) { struct task *t = &sched->tasks[ind]; + /* Sort tasks depend on the drift of the cell. */ + if (t->type == task_type_sort && t->ci->nodeID == engine_rank) { + scheduler_addunlock(sched, t->ci->drift, t); + } + /* Self-interaction? */ - if (t->type == task_type_self && t->subtype == task_subtype_density) { + else if (t->type == task_type_self && t->subtype == task_subtype_density) { + + /* Make all density tasks depend on the drift. */ + scheduler_addunlock(sched, t->ci->drift, t); #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL, 0); + sched, task_type_self, task_subtype_gradient, 0, 0, t->ci, NULL); struct task *t3 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0); + sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); /* Add the link between the new loops and the cell */ engine_addlink(e, &t->ci->gradient, t2); @@ -2100,7 +2100,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask( - sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0); + sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL); /* Add the link between the new loop and the cell */ engine_addlink(e, &t->ci->force, t2); @@ -2113,12 +2113,18 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_density) { + /* Make all density tasks depend on the drift. */ + if (t->ci->nodeID == engine_rank) + scheduler_addunlock(sched, t->ci->drift, t); + if (t->cj->nodeID == engine_rank) + scheduler_addunlock(sched, t->cj->drift, t); + #ifdef EXTRA_HYDRO_LOOP /* Start by constructing the task for the second and third hydro loop */ struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj, 0); + sched, task_type_pair, task_subtype_gradient, 0, 0, t->ci, t->cj); struct task *t3 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0); + sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ engine_addlink(e, &t->ci->gradient, t2); @@ -2141,7 +2147,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask( - sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0); + sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ engine_addlink(e, &t->ci->force, t2); @@ -2169,10 +2175,10 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Start by constructing the task for the second and third hydro loop */ struct task *t2 = scheduler_addtask(sched, task_type_sub_self, task_subtype_gradient, - t->flags, 0, t->ci, t->cj, 0); + t->flags, 0, t->ci, t->cj); struct task *t3 = scheduler_addtask(sched, task_type_sub_self, task_subtype_force, - t->flags, 0, t->ci, t->cj, 0); + t->flags, 0, t->ci, t->cj); /* Add the link between the new loop and the cell */ engine_addlink(e, &t->ci->gradient, t2); @@ -2189,7 +2195,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask(sched, task_type_sub_self, task_subtype_force, - t->flags, 0, t->ci, t->cj, 0); + t->flags, 0, t->ci, t->cj); /* Add the link between the new loop and the cell */ engine_addlink(e, &t->ci->force, t2); @@ -2211,10 +2217,10 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Start by constructing the task for the second and third hydro loop */ struct task *t2 = scheduler_addtask(sched, task_type_sub_pair, task_subtype_gradient, - t->flags, 0, t->ci, t->cj, 0); + t->flags, 0, t->ci, t->cj); struct task *t3 = scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, - t->flags, 0, t->ci, t->cj, 0); + t->flags, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ engine_addlink(e, &t->ci->gradient, t2); @@ -2237,7 +2243,7 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask(sched, task_type_sub_pair, task_subtype_force, - t->flags, 0, t->ci, t->cj, 0); + t->flags, 0, t->ci, t->cj); /* Add the link between the new loop and both cells */ engine_addlink(e, &t->ci->force, t2); @@ -2282,7 +2288,7 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { /* struct task *down = NULL; */ /* /\* scheduler_addtask(sched, task_type_grav_down, * task_subtype_none, 0, 0, *\/ */ - /* /\* &cells[k], NULL, 0); *\/ */ + /* /\* &cells[k], NULL); *\/ */ /* /\* Push tasks down the cell hierarchy. *\/ */ /* engine_addtasks_grav(e, &cells[k], up, down); */ @@ -2290,6 +2296,21 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { /* } */ } +void engine_check_sort_tasks(struct engine *e, struct cell *c) { + + /* Find the parent sort task, if any, and copy its flags. */ + if (c->sorts != NULL) { + struct cell *parent = c->parent; + while (parent != NULL && parent->sorts == NULL) parent = parent->parent; + if (parent != NULL) c->sorts->flags |= parent->sorts->flags; + } + + /* Recurse? */ + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) engine_check_sort_tasks(e, c->progeny[k]); +} + /** * @brief Fill the #space's task list. * @@ -2350,10 +2371,13 @@ void engine_maketasks(struct engine *e) { * pointers. */ for (int k = 0; k < nr_cells; k++) cell_set_super(&cells[k], NULL); - /* Append hierarchical tasks to each cells */ + /* Append hierarchical tasks to each cell. */ for (int k = 0; k < nr_cells; k++) engine_make_hierarchical_tasks(e, &cells[k]); + /* Append hierarchical tasks to each cell. */ + for (int k = 0; k < nr_cells; k++) engine_check_sort_tasks(e, &cells[k]); + /* Run through the tasks and make force tasks for each density task. Each force task depends on the cell ghosts and unlocks the kick task of its super-cell. */ @@ -2437,15 +2461,8 @@ void engine_marktasks_mapper(void *map_data, int num_elements, else if (t->type == task_type_pair || t->type == task_type_sub_pair) { /* Local pointers. */ - const struct cell *ci = t->ci; - const struct cell *cj = t->cj; - - /* Too much particle movement? */ - if (t->tight && - (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin || - ci->dx_max > space_maxreldx * ci->h_max || - cj->dx_max > space_maxreldx * cj->h_max)) - *rebuild_space = 1; + struct cell *ci = t->ci; + struct cell *cj = t->cj; /* Set this task's skip, otherwise nothing to do. */ if (cell_is_active(t->ci, e) || cell_is_active(t->cj, e)) @@ -2456,20 +2473,41 @@ void engine_marktasks_mapper(void *map_data, int num_elements, /* If this is not a density task, we don't have to do any of the below. */ if (t->subtype != task_subtype_density) continue; - /* Set the sort flags. */ + /* Too much particle movement? */ + if (max(ci->h_max, cj->h_max) + ci->dx_max + cj->dx_max > cj->dmin) + *rebuild_space = 1; + + /* Set the correct sorting flags */ if (t->type == task_type_pair) { + if (ci->dx_max_sort > space_maxreldx * ci->dmin) { + for (struct cell *finger = ci; finger != NULL; + finger = finger->parent) + finger->sorted = 0; + } + if (cj->dx_max_sort > space_maxreldx * cj->dmin) { + for (struct cell *finger = cj; finger != NULL; + finger = finger->parent) + finger->sorted = 0; + } if (!(ci->sorted & (1 << t->flags))) { - atomic_or(&ci->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(ci->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, ci->sorts); + if (ci->nodeID == engine_rank) scheduler_activate(s, ci->drift); } if (!(cj->sorted & (1 << t->flags))) { - atomic_or(&cj->sorts->flags, (1 << t->flags)); +#ifdef SWIFT_DEBUG_CHECKS + if (!(cj->sorts->flags & (1 << t->flags))) + error("bad flags in sort task."); +#endif scheduler_activate(s, cj->sorts); + if (cj->nodeID == engine_rank) scheduler_activate(s, cj->drift); } } #ifdef WITH_MPI - /* Activate the send/recv flags. */ if (ci->nodeID != engine_rank) { @@ -2488,10 +2526,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (cj->super->drift) - scheduler_activate(s, cj->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, cj->drift); if (cell_is_active(cj, e)) { for (l = cj->send_rho; l != NULL && l->t->cj->nodeID != ci->nodeID; @@ -2524,10 +2564,12 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_xv task."); scheduler_activate(s, l->t); - if (ci->super->drift) - scheduler_activate(s, ci->super->drift); + /* Drift both cells, the foreign one at the level which it is sent. */ + if (l->t->ci->drift) + scheduler_activate(s, l->t->ci->drift); else error("Drift task missing !"); + if (t->type == task_type_pair) scheduler_activate(s, ci->drift); if (cell_is_active(ci, e)) { for (l = ci->send_rho; l != NULL && l->t->cj->nodeID != cj->nodeID; @@ -2542,15 +2584,22 @@ void engine_marktasks_mapper(void *map_data, int num_elements, if (l == NULL) error("Missing link to send_ti task."); scheduler_activate(s, l->t); } - } + } else if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); + } +#else + if (t->type == task_type_pair) { + scheduler_activate(s, ci->drift); + scheduler_activate(s, cj->drift); + } #endif } - /* Kick/Drift/Init? */ + /* Kick/Drift? */ else if (t->type == task_type_kick1 || t->type == task_type_kick2 || - t->type == task_type_drift || t->type == task_type_init || - t->type == task_type_init_grav) { + t->type == task_type_drift || t->type == task_type_init_grav) { if (cell_is_active(t->ci, e)) scheduler_activate(s, t); } @@ -2691,7 +2740,8 @@ void engine_rebuild(struct engine *e) { */ void engine_prepare(struct engine *e) { - TIMER_TIC; + TIMER_TIC2; + const ticks tic = getticks(); #ifdef SWIFT_DEBUG_CHECKS if (e->forcerepart || e->forcerebuild) { @@ -2718,7 +2768,7 @@ void engine_prepare(struct engine *e) { } e->tasks_age += 1; - TIMER_TOC(timer_prepare); + TIMER_TOC2(timer_prepare); if (e->verbose) message("took %.3f %s (including unskip and reweight).", @@ -3093,6 +3143,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); + /* Init the particle hydro data (by hand). */ + for (size_t k = 0; k < s->nr_parts; k++) + hydro_init_part(&s->parts[k], &e->s->hs); + for (size_t k = 0; k < s->nr_gparts; k++) gravity_init_gpart(&s->gparts[k]); + /* Now, launch the calculation */ TIMER_TIC; engine_launch(e, e->nr_threads); @@ -3138,6 +3193,11 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* No drift this time */ engine_skip_drift(e); + /* Init the particle hydro data (by hand). */ + for (size_t k = 0; k < s->nr_parts; k++) + hydro_init_part(&s->parts[k], &e->s->hs); + for (size_t k = 0; k < s->nr_gparts; k++) gravity_init_gpart(&s->gparts[k]); + /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -3225,6 +3285,15 @@ void engine_step(struct engine *e) { /* Are we drifting everything (a la Gadget/GIZMO) ? */ if (e->policy & engine_policy_drift_all) engine_drift_all(e); + /* Are we reconstructing the multipoles or drifting them ?*/ + if (e->policy & engine_policy_self_gravity) { + + if (e->policy & engine_policy_reconstruct_mpoles) + engine_reconstruct_multipoles(e); + else + engine_drift_top_multipoles(e); + } + /* Print the number of active tasks ? */ if (e->verbose) engine_print_task_counts(e); @@ -3248,9 +3317,6 @@ void engine_step(struct engine *e) { gravity_exact_force_compute(e->s, e); #endif - /* Do we need to drift the top-level multipoles ? */ - if (e->policy & engine_policy_self_gravity) engine_drift_top_multipoles(e); - /* Start all the tasks. */ TIMER_TIC; engine_launch(e, e->nr_threads); @@ -3262,9 +3328,8 @@ void engine_step(struct engine *e) { gravity_exact_force_check(e->s, e, 1e-1); #endif - /* Let's trigger a rebuild every-so-often for good measure */ // MATTHIEU - // improve - if (!(e->policy & engine_policy_hydro) && + /* Let's trigger a rebuild every-so-often for good measure */ + if (!(e->policy & engine_policy_hydro) && // MATTHIEU improve this (e->policy & engine_policy_self_gravity) && e->step % 20 == 0) e->forcerebuild = 1; @@ -3447,6 +3512,39 @@ void engine_drift_top_multipoles(struct engine *e) { clocks_getunit()); } +void engine_do_reconstruct_multipoles_mapper(void *map_data, int num_elements, + void *extra_data) { + + struct engine *e = (struct engine *)extra_data; + struct cell *cells = (struct cell *)map_data; + + for (int ind = 0; ind < num_elements; ind++) { + struct cell *c = &cells[ind]; + if (c != NULL && c->nodeID == e->nodeID) { + + /* Construct the multipoles in this cell hierarchy */ + cell_make_multipoles(c, e->ti_current); + } + } +} + +/** + * @brief Reconstruct all the multipoles at all the levels in the tree. + * + * @param e The #engine. + */ +void engine_reconstruct_multipoles(struct engine *e) { + + const ticks tic = getticks(); + + threadpool_map(&e->threadpool, engine_do_reconstruct_multipoles_mapper, + e->s->cells_top, e->s->nr_cells, sizeof(struct cell), 10, e); + + if (e->verbose) + message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + clocks_getunit()); +} + /** * @brief Create and fill the proxies. * @@ -4166,11 +4264,13 @@ void engine_init(struct engine *e, struct space *s, e->runners[k].qid = k * nr_queues / e->nr_threads; } +#ifdef WITH_VECTORIZATION /* Allocate particle caches. */ e->runners[k].ci_cache.count = 0; e->runners[k].cj_cache.count = 0; cache_init(&e->runners[k].ci_cache, CACHE_SIZE); cache_init(&e->runners[k].cj_cache, CACHE_SIZE); +#endif if (verbose) { if (with_aff) @@ -4257,8 +4357,10 @@ void engine_compute_next_snapshot_time(struct engine *e) { */ void engine_clean(struct engine *e) { +#ifdef WITH_VECTORIZATION for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].ci_cache); for (int i = 0; i < e->nr_threads; ++i) cache_clean(&e->runners[i].cj_cache); +#endif free(e->runners); free(e->snapshotUnits); free(e->links); diff --git a/src/engine.h b/src/engine.h index 22ee122bb082895131584942bde50509952b98ff..e62b12332d3ac1b985b8f6d7181ea66824ec4f13 100644 --- a/src/engine.h +++ b/src/engine.h @@ -66,9 +66,10 @@ enum engine_policy { engine_policy_external_gravity = (1 << 9), engine_policy_cosmology = (1 << 10), engine_policy_drift_all = (1 << 11), - engine_policy_cooling = (1 << 12), - engine_policy_sourceterms = (1 << 13), - engine_policy_stars = (1 << 14) + engine_policy_reconstruct_mpoles = (1 << 12), + engine_policy_cooling = (1 << 13), + engine_policy_sourceterms = (1 << 14), + engine_policy_stars = (1 << 15) }; extern const char *engine_policy_names[]; @@ -256,6 +257,7 @@ void engine_compute_next_snapshot_time(struct engine *e); void engine_unskip(struct engine *e); void engine_drift_all(struct engine *e); void engine_drift_top_multipoles(struct engine *e); +void engine_reconstruct_multipoles(struct engine *e); void engine_dump_snapshot(struct engine *e); void engine_init(struct engine *e, struct space *s, const struct swift_params *params, int nr_nodes, int nodeID, diff --git a/src/gravity.c b/src/gravity.c index 405e52cc116416c27ec236340a0358443e255384..97b2955b32e1513c3d86d1d1f4da2169130feb77 100644 --- a/src/gravity.c +++ b/src/gravity.c @@ -32,6 +32,13 @@ #include "error.h" #include "version.h" +struct exact_force_data { + const struct engine *e; + const struct space *s; + int counter_global; + double const_G; +}; + /** * @brief Checks whether the file containing the exact accelerations for * the current choice of parameters already exists. @@ -83,32 +90,23 @@ int gravity_exact_force_file_exits(const struct engine *e) { } /** - * @brief Run a brute-force gravity calculation for a subset of particles. - * - * All gpart with ID modulo SWIFT_GRAVITY_FORCE_CHECKS will get their forces - * computed. - * - * @param s The #space to use. - * @param e The #engine (to access the current time). + * @brief Mapper function for the exact gravity calculation. */ -void gravity_exact_force_compute(struct space *s, const struct engine *e) { - +void gravity_exact_force_compute_mapper(void *map_data, int nr_gparts, + void *extra_data) { #ifdef SWIFT_GRAVITY_FORCE_CHECKS - const ticks tic = getticks(); - const double const_G = e->physical_constants->const_newton_G; + /* Unpack the data */ + struct gpart *restrict gparts = (struct gpart *)map_data; + struct exact_force_data *data = (struct exact_force_data *)extra_data; + const struct space *s = data->s; + const struct engine *e = data->e; + const double const_G = data->const_G; int counter = 0; - /* Let's start by checking whether we already computed these forces */ - if (gravity_exact_force_file_exits(e)) { - message("Exact accelerations already computed. Skipping calculation."); - return; - } - - /* No matching file present ? Do it then */ - for (size_t i = 0; i < s->nr_gparts; ++i) { + for (int i = 0; i < nr_gparts; ++i) { - struct gpart *gpi = &s->gparts[i]; + struct gpart *gpi = &gparts[i]; /* Is the particle active and part of the subset to be tested ? */ if (gpi->id_or_neg_offset % SWIFT_GRAVITY_FORCE_CHECKS == 0 && @@ -118,13 +116,13 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { double a_grav[3] = {0., 0., 0.}; /* Interact it with all other particles in the space.*/ - for (size_t j = 0; j < s->nr_gparts; ++j) { - - /* No self interaction */ - if (i == j) continue; + for (int j = 0; j < (int)s->nr_gparts; ++j) { struct gpart *gpj = &s->gparts[j]; + /* No self interaction */ + if (gpi == gpj) continue; + /* Compute the pairwise distance. */ const double dx[3] = {gpi->x[0] - gpj->x[0], // x gpi->x[1] - gpj->x[1], // y @@ -173,9 +171,47 @@ void gravity_exact_force_compute(struct space *s, const struct engine *e) { counter++; } } + atomic_add(&data->counter_global, counter); - message("Computed exact gravity for %d gparts (took %.3f %s). ", counter, - clocks_from_ticks(getticks() - tic), clocks_getunit()); +#else + error("Gravity checking function called without the corresponding flag."); +#endif +} + +/** + * @brief Run a brute-force gravity calculation for a subset of particles. + * + * All gpart with ID modulo SWIFT_GRAVITY_FORCE_CHECKS will get their forces + * computed. + * + * @param s The #space to use. + * @param e The #engine (to access the current time). + */ +void gravity_exact_force_compute(struct space *s, const struct engine *e) { + +#ifdef SWIFT_GRAVITY_FORCE_CHECKS + + const ticks tic = getticks(); + + /* Let's start by checking whether we already computed these forces */ + if (gravity_exact_force_file_exits(e)) { + message("Exact accelerations already computed. Skipping calculation."); + return; + } + + /* No matching file present ? Do it then */ + struct exact_force_data data; + data.e = e; + data.s = s; + data.counter_global = 0; + data.const_G = e->physical_constants->const_newton_G; + + threadpool_map(&s->e->threadpool, gravity_exact_force_compute_mapper, + s->gparts, s->nr_gparts, sizeof(struct gpart), 1000, &data); + + message("Computed exact gravity for %d gparts (took %.3f %s). ", + data.counter_global, clocks_from_ticks(getticks() - tic), + clocks_getunit()); #else error("Gravity checking function called without the corresponding flag."); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index e3487ecd23e01b83e711dc07a6c97fd5ecf6a50d..1eee6e678288a209b669c46f7c87fbb5c399b6c7 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -24,6 +24,39 @@ #include "gravity_properties.h" #include "minmax.h" +/** + * @brief Returns the mass of a particle + * + * @param gp The particle of interest + */ +__attribute__((always_inline)) INLINE static float gravity_get_mass( + const struct gpart* restrict gp) { + + return gp->mass; +} + +/** + * @brief Returns the softening of a particle + * + * @param gp The particle of interest + */ +__attribute__((always_inline)) INLINE static float gravity_get_softening( + const struct gpart* restrict gp) { + + return gp->epsilon; +} + +/** + * @brief Returns the potential of a particle + * + * @param gp The particle of interest + */ +__attribute__((always_inline)) INLINE static float gravity_get_potential( + const struct gpart* restrict gp) { + + return gp->potential; +} + /** * @brief Computes the gravity time-step of a given particle due to self-gravity * @@ -42,8 +75,10 @@ gravity_compute_timestep_self(const struct gpart* const gp, const float ac_inv = (ac2 > 0.f) ? 1.f / sqrtf(ac2) : FLT_MAX; - /* Note that 0.714285714 = 2. (from Gadget) / 2.8 (Plummer softening) */ - const float dt = sqrtf(0.714285714f * grav_props->eta * gp->epsilon * ac_inv); + const float epsilon = gravity_get_softening(gp); + + /* Note that 0.66666667 = 2. (from Gadget) / 3. (Plummer softening) */ + const float dt = sqrtf(0.66666667f * grav_props->eta * epsilon * ac_inv); return dt; } @@ -63,6 +98,7 @@ __attribute__((always_inline)) INLINE static void gravity_init_gpart( gp->a_grav[0] = 0.f; gp->a_grav[1] = 0.f; gp->a_grav[2] = 0.f; + gp->potential = 0.f; #ifdef SWIFT_DEBUG_CHECKS gp->num_interacted = 0; @@ -130,8 +166,8 @@ __attribute__((always_inline)) INLINE static void gravity_first_init_gpart( __attribute__((always_inline)) INLINE static void gravity_init_softening( struct gpart* gp, const struct gravity_props* grav_props) { - /* Note 2.8 is the Plummer-equivalent correction */ - gp->epsilon = 2.8f * grav_props->epsilon; + /* Note 3 is the Plummer-equivalent correction */ + gp->epsilon = grav_props->epsilon; } #endif /* SWIFT_DEFAULT_GRAVITY_H */ diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index bb1307a1a2fc1dcd71202e3426c99f3e30c0de9a..33c444e24e4bc9681dc3138298b5fd9301d084d3 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -41,6 +41,9 @@ struct gpart { /* Particle mass. */ float mass; + /* Gravitational potential */ + float potential; + /* Softening length */ float epsilon; diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h index 337ae67f4620677e7a3dfdd1b0c527eb016504e2..8c8379f74f5fc67d3671f0154b2aeacbc35ea9f1 100644 --- a/src/gravity_derivatives.h +++ b/src/gravity_derivatives.h @@ -21,14 +21,14 @@ /** * @file gravity_derivatives.h - * @brief Derivatives (up to 3rd order) of the gravitational potential. + * @brief Derivatives (up to 5th order) of the gravitational potential. * * We use the notation of Dehnen, Computational Astrophysics and Cosmology, * 1, 1, pp. 24 (2014), arXiv:1405.2255 */ -/* Some standard headers. */ -#include <math.h> +/* Config parameters. */ +#include "../config.h" /* Local headers. */ #include "inline.h" diff --git a/src/gravity_properties.c b/src/gravity_properties.c index f52029fa1543ad1f8d0121c8c4e6d362227f4c53..7b9b8cd7c35f8fa9b21ff34ce2589b5d45ce8393 100644 --- a/src/gravity_properties.c +++ b/src/gravity_properties.c @@ -52,23 +52,25 @@ void gravity_props_init(struct gravity_props *p, p->theta_crit_inv = 1. / p->theta_crit; /* Softening lengths */ - p->epsilon = parser_get_param_double(params, "Gravity:epsilon"); + p->epsilon = 3. * parser_get_param_double(params, "Gravity:epsilon"); p->epsilon2 = p->epsilon * p->epsilon; p->epsilon_inv = 1. / p->epsilon; } void gravity_props_print(const struct gravity_props *p) { - message("Self-gravity scheme: FMM-MM"); + message("Self-gravity scheme: FMM-MM with m-poles of order %d", + SELF_GRAVITY_MULTIPOLE_ORDER); message("Self-gravity time integration: eta=%.4f", p->eta); message("Self-gravity opening angle: theta=%.4f", p->theta_crit); - message("Self-gravity softening: epsilon=%.4f", p->epsilon); + message("Self-gravity softening: epsilon=%.4f (Plummer equivalent: %.4f)", + p->epsilon, p->epsilon / 3.); if (p->a_smooth != gravity_props_default_a_smooth) - message("Self-gravity smoothing-scale: a_smooth=%f", p->a_smooth); + message("Self-gravity MM smoothing-scale: a_smooth=%f", p->a_smooth); if (p->r_cut != gravity_props_default_r_cut) message("Self-gravity MM cut-off: r_cut=%f", p->r_cut); @@ -80,7 +82,10 @@ void gravity_props_print_snapshot(hid_t h_grpgrav, io_write_attribute_f(h_grpgrav, "Time integration eta", p->eta); io_write_attribute_f(h_grpgrav, "Softening length", p->epsilon); + io_write_attribute_f(h_grpgrav, "Softening length (Plummer equivalent)", + p->epsilon / 3.); io_write_attribute_f(h_grpgrav, "Opening angle", p->theta_crit); + io_write_attribute_d(h_grpgrav, "MM order", SELF_GRAVITY_MULTIPOLE_ORDER); io_write_attribute_f(h_grpgrav, "MM a_smooth", p->a_smooth); io_write_attribute_f(h_grpgrav, "MM r_cut", p->r_cut); } diff --git a/src/gravity_softened_derivatives.h b/src/gravity_softened_derivatives.h new file mode 100644 index 0000000000000000000000000000000000000000..3f92476dab5940765b112708a867d940d4d5e6e9 --- /dev/null +++ b/src/gravity_softened_derivatives.h @@ -0,0 +1,443 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H +#define SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H + +/** + * @file gravity_softened_derivatives.h + * @brief Derivatives of the softened gravitational potential. + * + * We use the notation of Dehnen, Computational Astrophysics and Cosmology, + * 1, 1, pp. 24 (2014), arXiv:1405.2255 + */ + +/* Config parameters. */ +#include "../config.h" + +/* Local headers. */ +#include "inline.h" +#include "kernel_gravity.h" + +/*************************/ +/* 0th order derivatives */ +/*************************/ + +/** + * @brief \f$ \phi(r_x, r_y, r_z, h) \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_000( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return eps_inv * D_soft_0(u); +} + +/*************************/ +/* 1st order derivatives */ +/*************************/ + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z, h)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_100( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return -r_x * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z, h)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_010( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return -r_y * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial\phi(r_x, r_y, r_z, h)}{\partial r_x} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_001( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + return -r_z * eps_inv * eps_inv * eps_inv * D_soft_1(u); +} + +/*************************/ +/* 2nd order derivatives */ +/*************************/ + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_x^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_200( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv3 = eps_inv * eps_inv2; + const double eps_inv5 = eps_inv3 * eps_inv2; + return r_x * r_x * eps_inv5 * D_soft_2(u) - eps_inv3 * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_y^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_020( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv3 = eps_inv * eps_inv2; + const double eps_inv5 = eps_inv3 * eps_inv2; + return r_y * r_y * eps_inv5 * D_soft_2(u) - eps_inv3 * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_z^2} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_002( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv3 = eps_inv * eps_inv2; + const double eps_inv5 = eps_inv3 * eps_inv2; + return r_z * r_z * eps_inv5 * D_soft_2(u) - eps_inv3 * D_soft_1(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_x\partial r_y} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_110( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + return r_x * r_y * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_x\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_101( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + return r_x * r_z * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^2\phi(r_x, r_y, r_z, h)}{\partial r_y\partial r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_011( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + return r_y * r_z * eps_inv5 * D_soft_2(u); +} + +/*************************/ +/* 3rd order derivatives */ +/*************************/ + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_x^3} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_300( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_x * r_x * r_x * eps_inv7 * D_soft_3(u) + + 3. * r_x * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_y^3} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_030( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_y * r_y * r_y * eps_inv7 * D_soft_3(u) + + 3. * r_y * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_z^3} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_003( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_z * r_z * r_z * eps_inv7 * D_soft_3(u) + + 3. * r_z * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_x^2\partial + * r_y} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_210( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_x * r_x * r_y * eps_inv7 * D_soft_3(u) + + r_y * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_x^2\partial + * r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_201( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_x * r_x * r_z * eps_inv7 * D_soft_3(u) + + r_z * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_x\partial + * r_y^2} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_120( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_x * r_y * r_y * eps_inv7 * D_soft_3(u) + + r_x * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_y^2\partial + * r_z} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_021( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_y * r_y * r_z * eps_inv7 * D_soft_3(u) + + r_z * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_x\partial + * r_z^2} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_102( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_x * r_z * r_z * eps_inv7 * D_soft_3(u) + + r_x * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_y\partial + * r_z^2} + * \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_012( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv5 = eps_inv2 * eps_inv2 * eps_inv; + const double eps_inv7 = eps_inv5 * eps_inv2; + return -r_y * r_z * r_z * eps_inv7 * D_soft_3(u) + + r_y * eps_inv5 * D_soft_2(u); +} + +/** + * @brief \f$ \frac{\partial^3\phi(r_x, r_y, r_z, h)}{\partial r_z\partial + * r_y\partial r_z} \f$. + * + * @param r_x x-coordinate of the distance vector (\f$ r_x \f$). + * @param r_y y-coordinate of the distance vector (\f$ r_y \f$). + * @param r_z z-coordinate of the distance vector (\f$ r_z \f$). + * @param r Norm of the distance vector (\f$ |r| \f$). + * @param eps_inv Inverse of the softening length (\f$ 1/h \f$). + */ +__attribute__((always_inline)) INLINE static double D_soft_111( + double r_x, double r_y, double r_z, double r, double eps_inv) { + + const double u = r * eps_inv; + const double eps_inv2 = eps_inv * eps_inv; + const double eps_inv4 = eps_inv2 * eps_inv2; + const double eps_inv7 = eps_inv4 * eps_inv2 * eps_inv; + return -r_x * r_y * r_z * eps_inv7 * D_soft_3(u); +} + +#endif /* SWIFT_GRAVITY_SOFTENED_DERIVATIVE_H */ diff --git a/src/hydro/Default/hydro_part.h b/src/hydro/Default/hydro_part.h index 332eecb27fb65a6b4da48cbb595450a432c44615..8b14c45614e4c09c48056c9398ed4bfe3ed90e38 100644 --- a/src/hydro/Default/hydro_part.h +++ b/src/hydro/Default/hydro_part.h @@ -27,6 +27,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index 8a2571e6535ff988577d7d2c6e6c7baa98194aca..585b9394dd3ad70dfd9a518bdee0b134d46e5584 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -151,7 +151,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_density( for (k = 0; k < 3; k++) dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); #else - error("Unknown vector size.") + error("Unknown vector size."); #endif /* Get the radius and inverse radius. */ @@ -318,7 +318,7 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, for (k = 0; k < 3; k++) dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); #else - error("Unknown vector size.") + error("Unknown vector size."); #endif /* Get the radius and inverse radius. */ @@ -381,12 +381,14 @@ runner_iact_nonsym_vec_density(float *R2, float *Dx, float *Hi, float *Hj, * (non-symmetric vectorized version). */ __attribute__((always_inline)) INLINE static void -runner_iact_nonsym_1_vec_density( - vector *r2, vector *dx, vector *dy, vector *dz, vector hi_inv, vector vix, - vector viy, vector viz, float *Vjx, float *Vjy, float *Vjz, float *Mj, - vector *rhoSum, vector *rho_dhSum, vector *wcountSum, vector *wcount_dhSum, - vector *div_vSum, vector *curlvxSum, vector *curlvySum, vector *curlvzSum, - vector mask, int knlMask) { +runner_iact_nonsym_1_vec_density(vector *r2, vector *dx, vector *dy, vector *dz, + vector hi_inv, vector vix, vector viy, + vector viz, float *Vjx, float *Vjy, float *Vjz, + float *Mj, vector *rhoSum, vector *rho_dhSum, + vector *wcountSum, vector *wcount_dhSum, + vector *div_vSum, vector *curlvxSum, + vector *curlvySum, vector *curlvzSum, + vector mask, int knlMask) { vector r, ri, xi, wi, wi_dx; vector mj; @@ -843,7 +845,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_force( vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, pj[3]->force.balsara); #else - error("Unknown vector size.") + error("Unknown vector size."); #endif /* Get the radius and inverse radius. */ @@ -1122,7 +1124,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_vec_force( vec_set(pj[0]->force.balsara, pj[1]->force.balsara, pj[2]->force.balsara, pj[3]->force.balsara); #else - error("Unknown vector size.") + error("Unknown vector size."); #endif /* Get the radius and inverse radius. */ diff --git a/src/hydro/Gadget2/hydro_part.h b/src/hydro/Gadget2/hydro_part.h index 69ae79666e1db4e4f405c653cfc533606989a73a..571aaf39ed2509d95e9f68c34ab8e6c09ded3fbb 100644 --- a/src/hydro/Gadget2/hydro_part.h +++ b/src/hydro/Gadget2/hydro_part.h @@ -39,6 +39,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /* Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Gizmo/hydro_part.h b/src/hydro/Gizmo/hydro_part.h index 29cb6b00190f69e03c3d038ee485519969a8c6e9..d552a3f7e86031311098293845f1aa11270c417f 100644 --- a/src/hydro/Gizmo/hydro_part.h +++ b/src/hydro/Gizmo/hydro_part.h @@ -27,6 +27,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /* Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Minimal/hydro_part.h b/src/hydro/Minimal/hydro_part.h index dabae1a546d66f61db4f9796c21b71817ca20aac..e9289c099a8a4ee698b016036e4e3d4dad481768 100644 --- a/src/hydro/Minimal/hydro_part.h +++ b/src/hydro/Minimal/hydro_part.h @@ -46,6 +46,9 @@ struct xpart { /*! Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /*! Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/PressureEntropy/hydro_part.h b/src/hydro/PressureEntropy/hydro_part.h index b6e496918fa0e7989a8bddcfc5e8ea6b332c338e..6cb6fe87393e376dc189150286079faa9f41cb68 100644 --- a/src/hydro/PressureEntropy/hydro_part.h +++ b/src/hydro/PressureEntropy/hydro_part.h @@ -38,6 +38,9 @@ struct xpart { /*! Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /*! Velocity at the last full step. */ float v_full[3]; diff --git a/src/hydro/Shadowswift/hydro_part.h b/src/hydro/Shadowswift/hydro_part.h index 43237d9c80a5dfa5bed2fe409281b4f89b6aa172..e25400e905b893d13ffb552da42d3fbf96d71fde 100644 --- a/src/hydro/Shadowswift/hydro_part.h +++ b/src/hydro/Shadowswift/hydro_part.h @@ -28,6 +28,9 @@ struct xpart { /* Offset between current position and position at last tree rebuild. */ float x_diff[3]; + /*! Offset between the current position and position at the last sort. */ + float x_diff_sort[3]; + /* Velocity at the last full step. */ float v_full[3]; diff --git a/src/inline.h b/src/inline.h index c4dd9d59becabf54895b60cf3ac7ba809ac150c5..538f6f520c9d6187dc02f1cbd5d59480aac7bdb2 100644 --- a/src/inline.h +++ b/src/inline.h @@ -20,6 +20,9 @@ #ifndef SWIFT_INLINE_H #define SWIFT_INLINE_H +/* Config parameters. */ +#include "../config.h" + /** * @brief Defines inline */ diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 4a7588d312c381ef60fb97c43f8fadb1e03dfead..5a9e839b63422a3f18c80caf9d891dd6f8be5da6 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -23,113 +23,94 @@ #include <math.h> /* Includes. */ -#include "const.h" #include "inline.h" #include "minmax.h" -#include "vector.h" - -/* The gravity kernel is defined as a degree 6 polynomial in the distance - r. The resulting value should be post-multiplied with r^-3, resulting - in a polynomial with terms ranging from r^-3 to r^3, which are - sufficient to model both the direct potential as well as the splines - near the origin. - As in the hydro case, the 1/h^3 needs to be multiplied in afterwards */ - -/* Coefficients for the kernel. */ -#define kernel_grav_name "Gadget-2 softening kernel" -#define kernel_grav_degree 6 /* Degree of the polynomial */ -#define kernel_grav_ivals 2 /* Number of branches */ -static const float - kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] - __attribute__((aligned(16))) = {32.f, - -38.4f, - 0.f, - 10.66666667f, - 0.f, - 0.f, - 0.f, /* 0 < u < 0.5 */ - -10.66666667f, - 38.4f, - -48.f, - 21.3333333f, - 0.f, - 0.f, - -0.066666667f, /* 0.5 < u < 1 */ - 0.f, - 0.f, - 0.f, - 0.f, - 0.f, - 0.f, - 0.f}; /* 1 < u */ /** * @brief Computes the gravity softening function. * + * This functions assumes 0 < u < 1. + * * @param u The ratio of the distance to the softening length $u = x/h$. * @param W (return) The value of the kernel function $W(x,h)$. */ __attribute__((always_inline)) INLINE static void kernel_grav_eval( float u, float *const W) { - /* Pick the correct branch of the kernel */ - const int ind = (int)min(u * (float)kernel_grav_ivals, kernel_grav_ivals); - const float *const coeffs = - &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)]; - - /* First two terms of the polynomial ... */ - float w = coeffs[0] * u + coeffs[1]; - - /* ... and the rest of them */ - for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k]; - - /* Return everything */ - *W = w / (u * u * u); + /* W(u) = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ + *W = 21.f * u - 90.f; + *W = *W * u + 140.f; + *W = *W * u - 84.f; + *W = *W * u; + *W = *W * u + 14.f; } #ifdef SWIFT_GRAVITY_FORCE_CHECKS +/** + * @brief Computes the gravity softening function in double precision. + * + * This functions assumes 0 < u < 1. + * + * @param u The ratio of the distance to the softening length $u = x/h$. + * @param W (return) The value of the kernel function $W(x,h)$. + */ __attribute__((always_inline)) INLINE static void kernel_grav_eval_double( double u, double *const W) { - static const double kernel_grav_coeffs_double[(kernel_grav_degree + 1) * - (kernel_grav_ivals + 1)] - __attribute__((aligned(16))) = {32., - -38.4, - 0., - 10.66666667, - 0., - 0., - 0., /* 0 < u < 0.5 */ - -10.66666667, - 38.4, - -48., - 21.3333333, - 0., - 0., - -0.066666667, /* 0.5 < u < 1 */ - 0., - 0., - 0., - 0., - 0., - 0., - 0.}; /* 1 < u */ - - /* Pick the correct branch of the kernel */ - const int ind = (int)min(u * (double)kernel_grav_ivals, kernel_grav_ivals); - const double *const coeffs = - &kernel_grav_coeffs_double[ind * (kernel_grav_degree + 1)]; - - /* First two terms of the polynomial ... */ - double w = coeffs[0] * u + coeffs[1]; - - /* ... and the rest of them */ - for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k]; - - /* Return everything */ - *W = w / (u * u * u); + /* W(u) = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ + *W = 21. * u - 90.; + *W = *W * u + 140.; + *W = *W * u - 84.; + *W = *W * u; + *W = *W * u + 14.; } #endif /* SWIFT_GRAVITY_FORCE_CHECKS */ +/************************************************/ +/* Derivatives of softening kernel used for FMM */ +/************************************************/ + +__attribute__((always_inline)) INLINE static double D_soft_0(double u) { + + /* phi(u) = -3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3 */ + double phi = -3. * u + 15.; + phi = phi * u - 28.; + phi = phi * u + 21.; + phi = phi * u; + phi = phi * u - 7.; + phi = phi * u; + phi = phi * u + 3.; + + return phi; +} + +__attribute__((always_inline)) INLINE static double D_soft_1(double u) { + + /* phi'(u)/u = 21u^5 - 90u^4 + 140u^3 - 84u^2 + 14 */ + double phi = 21. * u - 90.; + phi = phi * u + 140.; + phi = phi * u - 84.; + phi = phi * u; + phi = phi * u + 14.; + + return phi; +} + +__attribute__((always_inline)) INLINE static double D_soft_2(double u) { + + /* (phi'(u)/u)'/u = -105u^3 + 360u^2 - 420u + 168 */ + double phi = -105. * u + 360.; + phi = phi * u - 420.; + phi = phi * u + 168.; + + return phi; +} + +__attribute__((always_inline)) INLINE static double D_soft_3(double u) { + + /* ((phi'(u)/u)'/u)'/u = 315u - 720 + 420/u */ + return 315. * u - 720. + 420. / u; +} + #endif /* SWIFT_KERNEL_GRAVITY_H */ diff --git a/src/kernel_hydro.h b/src/kernel_hydro.h index 585655940cf6f0587fd7dd1efae1bf546476b1da..f634a59d7ee769951e6560d46a92053c144cc766 100644 --- a/src/kernel_hydro.h +++ b/src/kernel_hydro.h @@ -327,7 +327,8 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx( const float *const coeffs = &kernel_coeffs[ind * (kernel_degree + 1)]; /* First two terms of the polynomial ... */ - float dw_dx = ((float)kernel_degree * coeffs[0] * x) + (float)(kernel_degree - 1) * coeffs[1]; + float dw_dx = ((float)kernel_degree * coeffs[0] * x) + + (float)(kernel_degree - 1) * coeffs[1]; /* ... and the rest of them */ for (int k = 2; k < kernel_degree; k++) { @@ -396,7 +397,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_vec( dw_dx->v * kernel_constant_vec.v * kernel_gamma_inv_dim_plus_one_vec.v; } -/* Define constant vectors for the Wendland C2 and Cubic Spline kernel coefficients. */ +/* Define constant vectors for the Wendland C2 and Cubic Spline kernel + * coefficients. */ #ifdef WENDLAND_C2_KERNEL static const vector wendland_const_c0 = FILL_VEC(4.f); static const vector wendland_const_c1 = FILL_VEC(-15.f); @@ -469,37 +471,39 @@ __attribute__((always_inline)) INLINE static void kernel_deval_1_vec( vector mask_reg1, mask_reg2; /* Form a mask for each part of the kernel. */ - mask_reg1.v = vec_cmp_lt(x.v,cond.v); /* 0 < x < 0.5 */ - mask_reg2.v = vec_cmp_gte(x.v,cond.v); /* 0.5 < x < 1 */; + mask_reg1.v = vec_cmp_lt(x.v, cond.v); /* 0 < x < 0.5 */ + mask_reg2.v = vec_cmp_gte(x.v, cond.v); /* 0.5 < x < 1 */ + ; - /* Work out w for both regions of the kernel and combine the results together using masks. */ + /* Work out w for both regions of the kernel and combine the results together + * using masks. */ /* Init the iteration for Horner's scheme. */ w->v = vec_fma(cubic_1_const_c0.v, x.v, cubic_1_const_c1.v); w2.v = vec_fma(cubic_2_const_c0.v, x.v, cubic_2_const_c1.v); dw_dx->v = cubic_1_const_c0.v; dw_dx2.v = cubic_2_const_c0.v; - + /* Calculate the polynomial interleaving vector operations. */ dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); dw_dx2.v = vec_fma(dw_dx2.v, x.v, w2.v); w->v = vec_mul(x.v, w->v); /* cubic_1_const_c2 is zero. */ w2.v = vec_fma(x.v, w2.v, cubic_2_const_c2.v); - + dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); dw_dx2.v = vec_fma(dw_dx2.v, x.v, w2.v); w->v = vec_fma(x.v, w->v, cubic_1_const_c3.v); w2.v = vec_fma(x.v, w2.v, cubic_2_const_c3.v); - + /* Mask out unneeded values. */ - w->v = vec_and(w->v,mask_reg1.v); - w2.v = vec_and(w2.v,mask_reg2.v); - dw_dx->v = vec_and(dw_dx->v,mask_reg1.v); - dw_dx2.v = vec_and(dw_dx2.v,mask_reg2.v); + w->v = vec_and(w->v, mask_reg1.v); + w2.v = vec_and(w2.v, mask_reg2.v); + dw_dx->v = vec_and(dw_dx->v, mask_reg1.v); + dw_dx2.v = vec_and(dw_dx2.v, mask_reg2.v); /* Added both w and w2 together to form complete result. */ - w->v = vec_add(w->v,w2.v); - dw_dx->v = vec_add(dw_dx->v,dw_dx2.v); + w->v = vec_add(w->v, w2.v); + dw_dx->v = vec_add(dw_dx->v, dw_dx2.v); #else #error #endif @@ -556,7 +560,7 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v); - w->v = vec_mul(x.v, w->v); /* wendland_const_c4 is zero. */ + w->v = vec_mul(x.v, w->v); /* wendland_const_c4 is zero. */ w2->v = vec_mul(x2.v, w2->v); /* wendland_const_c4 is zero. */ dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); @@ -579,12 +583,15 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( vector mask_reg1, mask_reg2, mask_reg1_v2, mask_reg2_v2; /* Form a mask for each part of the kernel. */ - mask_reg1.v = vec_cmp_lt(x.v,cond.v); /* 0 < x < 0.5 */ - mask_reg1_v2.v = vec_cmp_lt(x2.v,cond.v); /* 0 < x < 0.5 */ - mask_reg2.v = vec_cmp_gte(x.v,cond.v); /* 0.5 < x < 1 */; - mask_reg2_v2.v = vec_cmp_gte(x2.v,cond.v); /* 0.5 < x < 1 */; + mask_reg1.v = vec_cmp_lt(x.v, cond.v); /* 0 < x < 0.5 */ + mask_reg1_v2.v = vec_cmp_lt(x2.v, cond.v); /* 0 < x < 0.5 */ + mask_reg2.v = vec_cmp_gte(x.v, cond.v); /* 0.5 < x < 1 */ + ; + mask_reg2_v2.v = vec_cmp_gte(x2.v, cond.v); /* 0.5 < x < 1 */ + ; - /* Work out w for both regions of the kernel and combine the results together using masks. */ + /* Work out w for both regions of the kernel and combine the results together + * using masks. */ /* Init the iteration for Horner's scheme. */ w->v = vec_fma(cubic_1_const_c0.v, x.v, cubic_1_const_c1.v); @@ -595,13 +602,13 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( dw_dx2->v = cubic_1_const_c0.v; dw_dx_2.v = cubic_2_const_c0.v; dw_dx2_2.v = cubic_2_const_c0.v; - + /* Calculate the polynomial interleaving vector operations. */ dw_dx->v = vec_fma(dw_dx->v, x.v, w->v); dw_dx2->v = vec_fma(dw_dx2->v, x2.v, w2->v); dw_dx_2.v = vec_fma(dw_dx_2.v, x.v, w_2.v); dw_dx2_2.v = vec_fma(dw_dx2_2.v, x2.v, w2_2.v); - w->v = vec_mul(x.v, w->v); /* cubic_1_const_c2 is zero. */ + w->v = vec_mul(x.v, w->v); /* cubic_1_const_c2 is zero. */ w2->v = vec_mul(x2.v, w2->v); /* cubic_1_const_c2 is zero. */ w_2.v = vec_fma(x.v, w_2.v, cubic_2_const_c2.v); w2_2.v = vec_fma(x2.v, w2_2.v, cubic_2_const_c2.v); @@ -616,20 +623,20 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( w2_2.v = vec_fma(x2.v, w2_2.v, cubic_2_const_c3.v); /* Mask out unneeded values. */ - w->v = vec_and(w->v,mask_reg1.v); - w2->v = vec_and(w2->v,mask_reg1_v2.v); - w_2.v = vec_and(w_2.v,mask_reg2.v); - w2_2.v = vec_and(w2_2.v,mask_reg2_v2.v); - dw_dx->v = vec_and(dw_dx->v,mask_reg1.v); - dw_dx2->v = vec_and(dw_dx2->v,mask_reg1_v2.v); - dw_dx_2.v = vec_and(dw_dx_2.v,mask_reg2.v); - dw_dx2_2.v = vec_and(dw_dx2_2.v,mask_reg2_v2.v); + w->v = vec_and(w->v, mask_reg1.v); + w2->v = vec_and(w2->v, mask_reg1_v2.v); + w_2.v = vec_and(w_2.v, mask_reg2.v); + w2_2.v = vec_and(w2_2.v, mask_reg2_v2.v); + dw_dx->v = vec_and(dw_dx->v, mask_reg1.v); + dw_dx2->v = vec_and(dw_dx2->v, mask_reg1_v2.v); + dw_dx_2.v = vec_and(dw_dx_2.v, mask_reg2.v); + dw_dx2_2.v = vec_and(dw_dx2_2.v, mask_reg2_v2.v); /* Added both w and w2 together to form complete result. */ - w->v = vec_add(w->v,w_2.v); - w2->v = vec_add(w2->v,w2_2.v); - dw_dx->v = vec_add(dw_dx->v,dw_dx_2.v); - dw_dx2->v = vec_add(dw_dx2->v,dw_dx2_2.v); + w->v = vec_add(w->v, w_2.v); + w2->v = vec_add(w2->v, w2_2.v); + dw_dx->v = vec_add(dw_dx->v, dw_dx_2.v); + dw_dx2->v = vec_add(dw_dx2->v, dw_dx2_2.v); /* Return everything */ w->v = w->v * kernel_constant_vec.v * kernel_gamma_inv_dim_vec.v; @@ -651,8 +658,8 @@ __attribute__((always_inline)) INLINE static void kernel_deval_2_vec( * @param u The ratio of the distance to the smoothing length $u = x/h$. * @param w (return) The value of the kernel function $W(x,h)$. */ -__attribute__((always_inline)) INLINE static void kernel_eval_W_vec( - vector *u, vector *w) { +__attribute__((always_inline)) INLINE static void kernel_eval_W_vec(vector *u, + vector *w) { /* Go to the range [0,1[ from [0,H[ */ vector x; @@ -672,28 +679,30 @@ __attribute__((always_inline)) INLINE static void kernel_eval_W_vec( vector mask1, mask2; /* Form a mask for each part of the kernel. */ - mask1.v = vec_cmp_lt(x.v,cond.v); /* 0 < x < 0.5 */ - mask2.v = vec_cmp_gte(x.v,cond.v); /* 0.5 < x < 1 */; + mask1.v = vec_cmp_lt(x.v, cond.v); /* 0 < x < 0.5 */ + mask2.v = vec_cmp_gte(x.v, cond.v); /* 0.5 < x < 1 */ + ; - /* Work out w for both regions of the kernel and combine the results together using masks. */ + /* Work out w for both regions of the kernel and combine the results together + * using masks. */ /* Init the iteration for Horner's scheme. */ w->v = vec_fma(cubic_1_const_c0.v, x.v, cubic_1_const_c1.v); w2.v = vec_fma(cubic_2_const_c0.v, x.v, cubic_2_const_c1.v); - + /* Calculate the polynomial interleaving vector operations. */ w->v = vec_mul(x.v, w->v); /* cubic_1_const_c2 is zero */ w2.v = vec_fma(x.v, w2.v, cubic_2_const_c2.v); - + w->v = vec_fma(x.v, w->v, cubic_1_const_c3.v); w2.v = vec_fma(x.v, w2.v, cubic_2_const_c3.v); - + /* Mask out unneeded values. */ - w->v = vec_and(w->v,mask1.v); - w2.v = vec_and(w2.v,mask2.v); + w->v = vec_and(w->v, mask1.v); + w2.v = vec_and(w2.v, mask2.v); /* Added both w and w2 together to form complete result. */ - w->v = vec_add(w->v,w2.v); + w->v = vec_add(w->v, w2.v); #else #error #endif @@ -721,39 +730,41 @@ __attribute__((always_inline)) INLINE static void kernel_eval_dWdx_vec( #ifdef WENDLAND_C2_KERNEL /* Init the iteration for Horner's scheme. */ - dw_dx->v = vec_fma(wendland_dwdx_const_c0.v,x.v,wendland_dwdx_const_c1.v); + dw_dx->v = vec_fma(wendland_dwdx_const_c0.v, x.v, wendland_dwdx_const_c1.v); /* Calculate the polynomial interleaving vector operations */ - dw_dx->v = vec_fma(dw_dx->v,x.v,wendland_dwdx_const_c2.v); + dw_dx->v = vec_fma(dw_dx->v, x.v, wendland_dwdx_const_c2.v); - dw_dx->v = vec_fma(dw_dx->v,x.v,wendland_dwdx_const_c3.v); + dw_dx->v = vec_fma(dw_dx->v, x.v, wendland_dwdx_const_c3.v); - dw_dx->v = vec_mul(dw_dx->v,x.v); + dw_dx->v = vec_mul(dw_dx->v, x.v); #elif defined(CUBIC_SPLINE_KERNEL) vector dw_dx2; vector mask1, mask2; /* Form a mask for each part of the kernel. */ - mask1.v = vec_cmp_lt(x.v,cond.v); /* 0 < x < 0.5 */ - mask2.v = vec_cmp_gte(x.v,cond.v); /* 0.5 < x < 1 */; + mask1.v = vec_cmp_lt(x.v, cond.v); /* 0 < x < 0.5 */ + mask2.v = vec_cmp_gte(x.v, cond.v); /* 0.5 < x < 1 */ + ; - /* Work out w for both regions of the kernel and combine the results together using masks. */ + /* Work out w for both regions of the kernel and combine the results together + * using masks. */ /* Init the iteration for Horner's scheme. */ - dw_dx->v = vec_fma(cubic_1_dwdx_const_c0.v,x.v,cubic_1_dwdx_const_c1.v); - dw_dx2.v = vec_fma(cubic_2_dwdx_const_c0.v,x.v,cubic_2_dwdx_const_c1.v); - + dw_dx->v = vec_fma(cubic_1_dwdx_const_c0.v, x.v, cubic_1_dwdx_const_c1.v); + dw_dx2.v = vec_fma(cubic_2_dwdx_const_c0.v, x.v, cubic_2_dwdx_const_c1.v); + /* Calculate the polynomial interleaving vector operations. */ - dw_dx->v = vec_mul(dw_dx->v,x.v); /* cubic_1_dwdx_const_c2 is zero. */ - dw_dx2.v = vec_fma(dw_dx2.v,x.v,cubic_2_dwdx_const_c2.v); - + dw_dx->v = vec_mul(dw_dx->v, x.v); /* cubic_1_dwdx_const_c2 is zero. */ + dw_dx2.v = vec_fma(dw_dx2.v, x.v, cubic_2_dwdx_const_c2.v); + /* Mask out unneeded values. */ - dw_dx->v = vec_and(dw_dx->v,mask1.v); - dw_dx2.v = vec_and(dw_dx2.v,mask2.v); + dw_dx->v = vec_and(dw_dx->v, mask1.v); + dw_dx2.v = vec_and(dw_dx2.v, mask2.v); /* Added both dwdx and dwdx2 together to form complete result. */ - dw_dx->v = vec_add(dw_dx->v,dw_dx2.v); + dw_dx->v = vec_add(dw_dx->v, dw_dx2.v); #else #error #endif diff --git a/src/multipole.h b/src/multipole.h index a633554490fe6226f8fe42befb7cb4eaf1e5135f..b9d49dcf0fc3b605849f7b058aef14843b73517d 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -34,6 +34,7 @@ #include "error.h" #include "gravity_derivatives.h" #include "gravity_properties.h" +#include "gravity_softened_derivatives.h" #include "inline.h" #include "kernel_gravity.h" #include "minmax.h" @@ -176,9 +177,15 @@ struct gravity_tensors { /*! Centre of mass of the matter dsitribution */ double CoM[3]; + /*! Centre of mass of the matter dsitribution at the last rebuild */ + double CoM_rebuild[3]; + /*! Upper limit of the CoM<->gpart distance */ double r_max; + /*! Upper limit of the CoM<->gpart distance at the last rebuild */ + double r_max_rebuild; + /*! Multipole mass */ struct multipole m_pole; @@ -1232,12 +1239,10 @@ INLINE static void gravity_P2M(struct gravity_tensors *m, * @param m_b The #multipole to shift. * @param pos_a The position to which m_b will be shifted. * @param pos_b The current postion of the multipole to shift. - * @param periodic Is the calculation periodic ? */ INLINE static void gravity_M2M(struct multipole *m_a, const struct multipole *m_b, - const double pos_a[3], const double pos_b[3], - int periodic) { + const double pos_a[3], const double pos_b[3]) { /* Shift bulk velocity */ m_a->vel[0] = m_b->vel[0]; m_a->vel[1] = m_b->vel[1]; @@ -1501,6 +1506,9 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, const struct gravity_props *props, int periodic) { + /* Recover some constants */ + const double eps2 = props->epsilon2; + /* Compute distance vector */ const double dx = periodic ? box_wrap(pos_b[0] - pos_a[0], 0., 1.) : pos_b[0] - pos_a[0]; @@ -1519,7 +1527,7 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, #endif /* Un-softened case */ - if (r2 > props->epsilon2) { + if (r2 > eps2) { /* 0th order term */ l_b->F_000 += m_a->M_000 * D_000(dx, dy, dz, r_inv); @@ -1618,7 +1626,7 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, m_a->M_010 * D_021(dx, dy, dz, r_inv) + m_a->M_001 * D_012(dx, dy, dz, r_inv); - /* 3rd order multipole term (addition to rank 2)*/ + /* 3rd order multipole term (addition to rank 3)*/ l_b->F_300 += m_a->M_000 * D_300(dx, dy, dz, r_inv); l_b->F_030 += m_a->M_000 * D_030(dx, dy, dz, r_inv); l_b->F_003 += m_a->M_000 * D_003(dx, dy, dz, r_inv); @@ -2041,7 +2049,120 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, /* Softened case */ } else { - message("Un-supported softened M2L interaction."); + + const double eps_inv = props->epsilon_inv; + const double r = r2 * r_inv; + + /* 0th order term */ + l_b->F_000 += m_a->M_000 * D_soft_000(dx, dy, dz, r, eps_inv); + +#if SELF_GRAVITY_MULTIPOLE_ORDER > 0 + + /* 1st order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_100 * D_soft_100(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_010(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_001(dx, dy, dz, r, eps_inv); + + /* 1st order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_000 * D_soft_100(dx, dy, dz, r, eps_inv); + l_b->F_010 += m_a->M_000 * D_soft_010(dx, dy, dz, r, eps_inv); + l_b->F_001 += m_a->M_000 * D_soft_001(dx, dy, dz, r, eps_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 1 + + /* 2nd order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_200 * D_soft_200(dx, dy, dz, r, eps_inv) + + m_a->M_020 * D_soft_020(dx, dy, dz, r, eps_inv) + + m_a->M_002 * D_soft_002(dx, dy, dz, r, eps_inv); + l_b->F_000 += m_a->M_110 * D_soft_110(dx, dy, dz, r, eps_inv) + + m_a->M_101 * D_soft_101(dx, dy, dz, r, eps_inv) + + m_a->M_011 * D_soft_011(dx, dy, dz, r, eps_inv); + + /* 2nd order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_100 * D_soft_200(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_110(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_101(dx, dy, dz, r, eps_inv); + l_b->F_010 += m_a->M_100 * D_soft_110(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_020(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_011(dx, dy, dz, r, eps_inv); + l_b->F_001 += m_a->M_100 * D_soft_101(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_011(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_002(dx, dy, dz, r, eps_inv); + + /* 2nd order multipole term (addition to rank 2)*/ + l_b->F_200 += m_a->M_000 * D_soft_200(dx, dy, dz, r, eps_inv); + l_b->F_020 += m_a->M_000 * D_soft_020(dx, dy, dz, r, eps_inv); + l_b->F_002 += m_a->M_000 * D_soft_002(dx, dy, dz, r, eps_inv); + l_b->F_110 += m_a->M_000 * D_soft_110(dx, dy, dz, r, eps_inv); + l_b->F_101 += m_a->M_000 * D_soft_101(dx, dy, dz, r, eps_inv); + l_b->F_011 += m_a->M_000 * D_soft_011(dx, dy, dz, r, eps_inv); +#endif +#if SELF_GRAVITY_MULTIPOLE_ORDER > 2 + + /* 3rd order multipole term (addition to rank 0)*/ + l_b->F_000 += m_a->M_300 * D_soft_300(dx, dy, dz, r, eps_inv) + + m_a->M_030 * D_soft_030(dx, dy, dz, r, eps_inv) + + m_a->M_003 * D_soft_003(dx, dy, dz, r, eps_inv); + l_b->F_000 += m_a->M_210 * D_soft_210(dx, dy, dz, r, eps_inv) + + m_a->M_201 * D_soft_201(dx, dy, dz, r, eps_inv) + + m_a->M_120 * D_soft_120(dx, dy, dz, r, eps_inv); + l_b->F_000 += m_a->M_021 * D_soft_021(dx, dy, dz, r, eps_inv) + + m_a->M_102 * D_soft_102(dx, dy, dz, r, eps_inv) + + m_a->M_012 * D_soft_012(dx, dy, dz, r, eps_inv); + l_b->F_000 += m_a->M_111 * D_soft_111(dx, dy, dz, r, eps_inv); + + /* 3rd order multipole term (addition to rank 1)*/ + l_b->F_100 += m_a->M_200 * D_soft_300(dx, dy, dz, r, eps_inv) + + m_a->M_020 * D_soft_120(dx, dy, dz, r, eps_inv) + + m_a->M_002 * D_soft_102(dx, dy, dz, r, eps_inv); + l_b->F_100 += m_a->M_110 * D_soft_210(dx, dy, dz, r, eps_inv) + + m_a->M_101 * D_soft_201(dx, dy, dz, r, eps_inv) + + m_a->M_011 * D_soft_111(dx, dy, dz, r, eps_inv); + l_b->F_010 += m_a->M_200 * D_soft_210(dx, dy, dz, r, eps_inv) + + m_a->M_020 * D_soft_030(dx, dy, dz, r, eps_inv) + + m_a->M_002 * D_soft_012(dx, dy, dz, r, eps_inv); + l_b->F_010 += m_a->M_110 * D_soft_120(dx, dy, dz, r, eps_inv) + + m_a->M_101 * D_soft_111(dx, dy, dz, r, eps_inv) + + m_a->M_011 * D_soft_021(dx, dy, dz, r, eps_inv); + l_b->F_001 += m_a->M_200 * D_soft_201(dx, dy, dz, r, eps_inv) + + m_a->M_020 * D_soft_021(dx, dy, dz, r, eps_inv) + + m_a->M_002 * D_soft_003(dx, dy, dz, r, eps_inv); + l_b->F_001 += m_a->M_110 * D_soft_111(dx, dy, dz, r, eps_inv) + + m_a->M_101 * D_soft_102(dx, dy, dz, r, eps_inv) + + m_a->M_011 * D_soft_012(dx, dy, dz, r, eps_inv); + + /* 3rd order multipole term (addition to rank 2)*/ + l_b->F_200 += m_a->M_100 * D_soft_300(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_210(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_201(dx, dy, dz, r, eps_inv); + l_b->F_020 += m_a->M_100 * D_soft_120(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_030(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_021(dx, dy, dz, r, eps_inv); + l_b->F_002 += m_a->M_100 * D_soft_102(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_012(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_003(dx, dy, dz, r, eps_inv); + l_b->F_110 += m_a->M_100 * D_soft_210(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_120(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_111(dx, dy, dz, r, eps_inv); + l_b->F_101 += m_a->M_100 * D_soft_201(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_111(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_102(dx, dy, dz, r, eps_inv); + l_b->F_011 += m_a->M_100 * D_soft_111(dx, dy, dz, r, eps_inv) + + m_a->M_010 * D_soft_021(dx, dy, dz, r, eps_inv) + + m_a->M_001 * D_soft_012(dx, dy, dz, r, eps_inv); + + /* 3rd order multipole term (addition to rank 3)*/ + l_b->F_300 += m_a->M_000 * D_soft_300(dx, dy, dz, r, eps_inv); + l_b->F_030 += m_a->M_000 * D_soft_030(dx, dy, dz, r, eps_inv); + l_b->F_003 += m_a->M_000 * D_soft_003(dx, dy, dz, r, eps_inv); + l_b->F_210 += m_a->M_000 * D_soft_210(dx, dy, dz, r, eps_inv); + l_b->F_201 += m_a->M_000 * D_soft_201(dx, dy, dz, r, eps_inv); + l_b->F_120 += m_a->M_000 * D_soft_120(dx, dy, dz, r, eps_inv); + l_b->F_021 += m_a->M_000 * D_soft_021(dx, dy, dz, r, eps_inv); + l_b->F_102 += m_a->M_000 * D_soft_102(dx, dy, dz, r, eps_inv); + l_b->F_012 += m_a->M_000 * D_soft_012(dx, dy, dz, r, eps_inv); + l_b->F_111 += m_a->M_000 * D_soft_111(dx, dy, dz, r, eps_inv); +#endif } } @@ -2521,20 +2642,29 @@ INLINE static void gravity_L2P(const struct grav_tensor *lb, * @param ma The #multipole of the first #cell. * @param mb The #multipole of the second #cell. * @param theta_crit_inv The inverse of the critical opening angle. + * @param rebuild Are we using the current value of CoM or the ones from + * the last rebuild ? */ __attribute__((always_inline)) INLINE static int gravity_multipole_accept( const struct gravity_tensors *ma, const struct gravity_tensors *mb, - double theta_crit_inv) { + double theta_crit_inv, int rebuild) { - const double r_crit_a = ma->r_max * theta_crit_inv; - const double r_crit_b = mb->r_max * theta_crit_inv; + const double r_crit_a = + (rebuild ? ma->r_max_rebuild : ma->r_max) * theta_crit_inv; + const double r_crit_b = + (rebuild ? mb->r_max_rebuild : mb->r_max) * theta_crit_inv; - const double dx = ma->CoM[0] - mb->CoM[0]; - const double dy = ma->CoM[1] - mb->CoM[1]; - const double dz = ma->CoM[2] - mb->CoM[2]; + const double dx = rebuild ? ma->CoM_rebuild[0] - mb->CoM_rebuild[0] + : ma->CoM[0] - mb->CoM[0]; + const double dy = rebuild ? ma->CoM_rebuild[1] - mb->CoM_rebuild[1] + : ma->CoM[1] - mb->CoM[1]; + const double dz = rebuild ? ma->CoM_rebuild[2] - mb->CoM_rebuild[2] + : ma->CoM[2] - mb->CoM[2]; const double r2 = dx * dx + dy * dy + dz * dz; + // MATTHIEU: Make this mass-dependent ? + /* Multipole acceptance criterion (Dehnen 2002, eq.10) */ return (r2 > (r_crit_a + r_crit_b) * (r_crit_a + r_crit_b)); } diff --git a/src/partition.c b/src/partition.c index 49dbf883e0dea3f00c93ca33ec8cb0248bbfbfaa..499efab263a9031b0116f073af8cebd5fef0c2eb 100644 --- a/src/partition.c +++ b/src/partition.c @@ -524,7 +524,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, t->type != task_type_sub_self && t->type != task_type_sub_self && t->type != task_type_ghost && t->type != task_type_kick1 && t->type != task_type_kick2 && t->type != task_type_timestep && - t->type != task_type_drift && t->type != task_type_init) + t->type != task_type_drift) continue; /* Get the task weight. */ diff --git a/src/profiler.c b/src/profiler.c index ad8338eebfd130d4088f9fd9d4fcc9856c8cc731..62ba881a15a32de63dba3cce95feab79d595aa84 100644 --- a/src/profiler.c +++ b/src/profiler.c @@ -31,6 +31,46 @@ #include "hydro.h" #include "version.h" +/* Array to store the list of file names. Order must match profiler_types + * enumerator and profiler_func_names. */ +const char *profiler_file_names[profiler_length] = {"enginecollecttimesteps", + "enginedrift", + "enginerebuild", + "schedulerreweight", + "schedulerclearwaits", + "schedulerrewait", + "schedulerenqueue", + "engineprintstats", + "enginelaunch", + "spacerebuild", + "enginemaketasks", + "enginemarktasks", + "spaceregrid", + "spacepartssort", + "spacesplit", + "spacegetcellid", + "spacecountparts"}; + +/* Array to store the list of function names. Order must match profiler_types + * enumerator and profiler_file_names. */ +const char *profiler_func_names[profiler_length] = {"engine_collect_timesteps", + "engine_drift", + "engine_rebuild", + "scheduler_reweight", + "scheduler_clear_waits", + "scheduler_rewait", + "scheduler_enqueue", + "engine_print_stats", + "engine_launch", + "space_rebuild", + "engine_maketasks", + "engine_marktasks", + "space_regrid", + "space_parts_sort", + "space_split", + "space_get_cell_id", + "space_count_parts"}; + /** * @brief Resets all timers. * @@ -38,24 +78,8 @@ * function timers. */ void profiler_reset_timers(struct profiler *profiler) { - - profiler->collect_timesteps_time = 0; - profiler->drift_time = 0; - profiler->rebuild_time = 0; - profiler->reweight_time = 0; - profiler->clear_waits_time = 0; - profiler->re_wait_time = 0; - profiler->enqueue_time = 0; - profiler->stats_time = 0; - profiler->launch_time = 0; - profiler->space_rebuild_time = 0; - profiler->engine_maketasks_time = 0; - profiler->engine_marktasks_time = 0; - profiler->space_regrid_time = 0; - profiler->space_parts_sort_time = 0; - profiler->space_split_time = 0; - profiler->space_parts_get_cell_id_time = 0; - profiler->space_count_parts_time = 0; + /* Iterate over times array and reset values. */ + for (int i = 0; i < profiler_length; i++) profiler->times[i] = 0; } /** @@ -66,8 +90,9 @@ void profiler_reset_timers(struct profiler *profiler) { * @param functionName name of function that is being timed. * @param file (return) pointer used to open output file. */ -void profiler_write_timing_info_header(const struct engine *e, char *fileName, - char *functionName, FILE **file) { +void profiler_write_timing_info_header(const struct engine *e, + const char *fileName, + const char *functionName, FILE **file) { /* Create the file name in the format: "fileName_(no. of threads)" */ char fullFileName[200] = ""; @@ -82,13 +107,14 @@ void profiler_write_timing_info_header(const struct engine *e, char *fileName, "Number of threads: %d\n# Number of MPI ranks: %d\n# Hydrodynamic " "scheme: %s\n# Hydrodynamic kernel: %s\n# No. of neighbours: %.2f " "+/- %.2f\n# Eta: %f\n" - "# %6s %14s %14s %10s %10s %16s [%s]\n", + "# %6s %14s %14s %10s %10s %10s %16s [%s]\n", hostname(), functionName, git_revision(), compiler_name(), compiler_version(), e->nr_threads, e->nr_nodes, SPH_IMPLEMENTATION, kernel_name, e->hydro_properties->target_neighbours, e->hydro_properties->delta_neighbours, e->hydro_properties->eta_neighbours, "Step", "Time", "Time-step", - "Updates", "g-Updates", "Wall-clock time", clocks_getunit()); + "Updates", "g-Updates", "s-Updates", "Wall-clock time", + clocks_getunit()); fflush(*file); } @@ -103,44 +129,11 @@ void profiler_write_timing_info_header(const struct engine *e, char *fileName, */ void profiler_write_all_timing_info_headers(const struct engine *e, struct profiler *profiler) { - - profiler_write_timing_info_header(e, "enginecollecttimesteps", - "engine_collect_timesteps", - &profiler->file_engine_collect_timesteps); - profiler_write_timing_info_header(e, "enginedrift", "engine_drift", - &profiler->file_engine_drift); - profiler_write_timing_info_header(e, "enginerebuild", "engine_rebuild", - &profiler->file_engine_rebuild); - profiler_write_timing_info_header(e, "schedulerreweight", - "scheduler_reweight", - &profiler->file_scheduler_reweight); - profiler_write_timing_info_header(e, "schedulerclearwaits", - "scheduler_clear_waits", - &profiler->file_scheduler_clear_waits); - profiler_write_timing_info_header(e, "schedulerrewait", "scheduler_rewait", - &profiler->file_scheduler_re_wait); - profiler_write_timing_info_header(e, "schedulerenqueue", "scheduler_enqueue", - &profiler->file_scheduler_enqueue); - profiler_write_timing_info_header(e, "engineprintstats", "engine_print_stats", - &profiler->file_engine_stats); - profiler_write_timing_info_header(e, "enginelaunch", "engine_launch", - &profiler->file_engine_launch); - profiler_write_timing_info_header(e, "spacerebuild", "space_rebuild", - &profiler->file_space_rebuild); - profiler_write_timing_info_header(e, "enginemaketasks", "engine_maketasks", - &profiler->file_engine_maketasks); - profiler_write_timing_info_header(e, "enginemarktasks", "engine_marktasks", - &profiler->file_engine_marktasks); - profiler_write_timing_info_header(e, "spaceregrid", "space_regrid", - &profiler->file_space_regrid); - profiler_write_timing_info_header(e, "spacepartssort", "space_parts_sort", - &profiler->file_space_parts_sort); - profiler_write_timing_info_header(e, "spacesplit", "space_split", - &profiler->file_space_split); - profiler_write_timing_info_header(e, "spacegetcellid", "space_get_cell_id", - &profiler->file_space_parts_get_cell_id); - profiler_write_timing_info_header(e, "spacecountparts", "space_count_parts", - &profiler->file_space_count_parts); + /* Iterate over files array and write file headers. */ + for (int i = 0; i < profiler_length; i++) { + profiler_write_timing_info_header( + e, profiler_file_names[i], profiler_func_names[i], &profiler->files[i]); + } } /** @@ -153,8 +146,9 @@ void profiler_write_all_timing_info_headers(const struct engine *e, void profiler_write_timing_info(const struct engine *e, ticks time, FILE *file) { - fprintf(file, " %6d %14e %14e %10zu %10zu %21.3f\n", e->step, e->time, - e->timeStep, e->updates, e->g_updates, clocks_from_ticks(time)); + fprintf(file, " %6d %14e %14e %10zu %10zu %10zu %21.3f\n", e->step, e->time, + e->timeStep, e->updates, e->g_updates, e->s_updates, + clocks_from_ticks(time)); fflush(file); } @@ -169,39 +163,10 @@ void profiler_write_timing_info(const struct engine *e, ticks time, void profiler_write_all_timing_info(const struct engine *e, struct profiler *profiler) { - profiler_write_timing_info(e, profiler->drift_time, - profiler->file_engine_drift); - profiler_write_timing_info(e, profiler->rebuild_time, - profiler->file_engine_rebuild); - profiler_write_timing_info(e, profiler->reweight_time, - profiler->file_scheduler_reweight); - profiler_write_timing_info(e, profiler->clear_waits_time, - profiler->file_scheduler_clear_waits); - profiler_write_timing_info(e, profiler->re_wait_time, - profiler->file_scheduler_re_wait); - profiler_write_timing_info(e, profiler->enqueue_time, - profiler->file_scheduler_enqueue); - profiler_write_timing_info(e, profiler->stats_time, - profiler->file_engine_stats); - profiler_write_timing_info(e, profiler->launch_time, - profiler->file_engine_launch); - profiler_write_timing_info(e, profiler->space_rebuild_time, - profiler->file_space_rebuild); - profiler_write_timing_info(e, profiler->engine_maketasks_time, - profiler->file_engine_maketasks); - profiler_write_timing_info(e, profiler->engine_marktasks_time, - profiler->file_engine_marktasks); - profiler_write_timing_info(e, profiler->space_regrid_time, - profiler->file_space_regrid); - profiler_write_timing_info(e, profiler->space_parts_sort_time, - profiler->file_space_parts_sort); - profiler_write_timing_info(e, profiler->space_split_time, - profiler->file_space_split); - profiler_write_timing_info(e, profiler->space_parts_get_cell_id_time, - profiler->file_space_parts_get_cell_id); - profiler_write_timing_info(e, profiler->space_count_parts_time, - profiler->file_space_count_parts); - + /* Iterate over times array and print timing info to files. */ + for (int i = 0; i < profiler_length; i++) { + profiler_write_timing_info(e, profiler->times[i], profiler->files[i]); + } /* Reset timers. */ profiler_reset_timers(profiler); } @@ -215,20 +180,6 @@ void profiler_write_all_timing_info(const struct engine *e, */ void profiler_close_files(struct profiler *profiler) { - fclose(profiler->file_engine_drift); - fclose(profiler->file_engine_rebuild); - fclose(profiler->file_scheduler_reweight); - fclose(profiler->file_scheduler_clear_waits); - fclose(profiler->file_scheduler_re_wait); - fclose(profiler->file_scheduler_enqueue); - fclose(profiler->file_engine_stats); - fclose(profiler->file_engine_launch); - fclose(profiler->file_space_rebuild); - fclose(profiler->file_engine_maketasks); - fclose(profiler->file_engine_marktasks); - fclose(profiler->file_space_regrid); - fclose(profiler->file_space_parts_sort); - fclose(profiler->file_space_split); - fclose(profiler->file_space_parts_get_cell_id); - fclose(profiler->file_space_count_parts); + /* Iterate over files array and close files. */ + for (int i = 0; i < profiler_length; i++) fclose(profiler->files[i]); } diff --git a/src/profiler.h b/src/profiler.h index b00bc986ece8b78282b11ce317a6746ecba5a50f..911d4747912d64dd46d2cf59369670ff26a92012 100644 --- a/src/profiler.h +++ b/src/profiler.h @@ -25,46 +25,33 @@ /* Local includes */ #include "engine.h" +/* Enumerator to be used as an index into the timers and files array. To add an + * extra timer extend this list, before the profiler_length value.*/ +enum profiler_types { + profiler_engine_collect_timesteps = 0, + profiler_engine_drift, + profiler_engine_rebuild, + profiler_scheduler_reweight, + profiler_scheduler_clear_waits, + profiler_scheduler_re_wait, + profiler_scheduler_enqueue, + profiler_engine_stats, + profiler_engine_launch, + profiler_space_rebuild, + profiler_engine_maketasks, + profiler_engine_marktasks, + profiler_space_regrid, + profiler_space_parts_sort, + profiler_space_split, + profiler_space_parts_get_cell_id, + profiler_space_count_parts, + profiler_length +}; + /* Profiler that holds file pointers and time taken in functions. */ struct profiler { - - /* File pointers for timing info. */ - FILE *file_engine_collect_timesteps; - FILE *file_engine_drift; - FILE *file_engine_rebuild; - FILE *file_scheduler_reweight; - FILE *file_scheduler_clear_waits; - FILE *file_scheduler_re_wait; - FILE *file_scheduler_enqueue; - FILE *file_engine_stats; - FILE *file_engine_launch; - FILE *file_space_rebuild; - FILE *file_engine_maketasks; - FILE *file_engine_marktasks; - FILE *file_space_regrid; - FILE *file_space_parts_sort; - FILE *file_space_split; - FILE *file_space_parts_get_cell_id; - FILE *file_space_count_parts; - - /* Time taken in functions. */ - ticks collect_timesteps_time; - ticks drift_time; - ticks rebuild_time; - ticks reweight_time; - ticks clear_waits_time; - ticks re_wait_time; - ticks enqueue_time; - ticks stats_time; - ticks launch_time; - ticks space_rebuild_time; - ticks engine_maketasks_time; - ticks engine_marktasks_time; - ticks space_regrid_time; - ticks space_parts_sort_time; - ticks space_split_time; - ticks space_parts_get_cell_id_time; - ticks space_count_parts_time; + FILE *files[profiler_length]; + ticks times[profiler_length]; }; /* Function prototypes. */ diff --git a/src/runner.c b/src/runner.c index 99b399b044f5afdc1729e58ca9d36c7d016ccd70..b4dbe7b377255b27520b611e0ca9e24289b38ba7 100644 --- a/src/runner.c +++ b/src/runner.c @@ -289,6 +289,26 @@ void runner_do_sort_ascending(struct entry *sort, int N) { } } +/** + * @brief Recursively checks that the flags are consistent in a cell hierarchy. + * + * Debugging function. + * + * @param c The #cell to check. + * @param flags The sorting flags to check. + */ +void runner_check_sorts(struct cell *c, int flags) { + +#ifdef SWIFT_DEBUG_CHECKS + if (flags & ~c->sorted) error("Inconsistent sort flags (downward)!"); + if (c->split) + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_check_sorts(c->progeny[k], c->sorted); +#else + error("Calling debugging code without debugging flag activated."); +#endif +} + /** * @brief Sort the particles in the given cell along all cardinal directions. * @@ -303,16 +323,37 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { struct entry *finger; struct entry *fingers[8]; struct part *parts = c->parts; + struct xpart *xparts = c->xparts; struct entry *sort; - int j, k, count = c->count; - int i, ind, off[8], inds[8], temp_i, missing; + const int count = c->count; float buff[8]; - double px[3]; - TIMER_TIC + TIMER_TIC; + + /* Check that the particles have been moved to the current time */ + if (!cell_is_drifted(c, r->e)) error("Sorting un-drifted cell"); - /* Clean-up the flags, i.e. filter out what's already been sorted. */ - flags &= ~c->sorted; +#ifdef SWIFT_DEBUG_CHECKS + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts(c, c->sorted); + + /* Make sure the sort flags are consistent (upard). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->sorted & ~c->sorted) error("Inconsistent sort flags (upward)."); + } +#endif + + /* Clean-up the flags, i.e. filter out what's already been sorted, but + only if the sorts are recent. */ + if (c->ti_sort == r->e->ti_current) { + /* Ignore dimensions that have been sorted in this timestep. */ + // flags &= ~c->sorted; + } else { + /* Clean old (stale) sorts. */ + flags |= c->sorted; + c->sorted = 0; + } if (flags == 0) return; /* start by allocating the entry arrays. */ @@ -329,27 +370,35 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { if (c->split) { /* Fill in the gaps within the progeny. */ - for (k = 0; k < 8; k++) { - if (c->progeny[k] == NULL) continue; - missing = flags & ~c->progeny[k]->sorted; - if (missing) runner_do_sort(r, c->progeny[k], missing, 0); + float dx_max_sort = 0.0f; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) { + if (flags & ~c->progeny[k]->sorted || + c->progeny[k]->dx_max_sort > c->dmin * space_maxreldx) + runner_do_sort(r, c->progeny[k], flags, 0); + dx_max_sort = max(dx_max_sort, c->progeny[k]->dx_max_sort); + } } + c->dx_max_sort = dx_max_sort; /* Loop over the 13 different sort arrays. */ - for (j = 0; j < 13; j++) { + for (int j = 0; j < 13; j++) { /* Has this sort array been flagged? */ if (!(flags & (1 << j))) continue; /* Init the particle index offsets. */ - for (off[0] = 0, k = 1; k < 8; k++) + int off[8]; + off[0] = 0; + for (int k = 1; k < 8; k++) if (c->progeny[k - 1] != NULL) off[k] = off[k - 1] + c->progeny[k - 1]->count; else off[k] = off[k - 1]; /* Init the entries and indices. */ - for (k = 0; k < 8; k++) { + int inds[8]; + for (int k = 0; k < 8; k++) { inds[k] = k; if (c->progeny[k] != NULL && c->progeny[k]->count > 0) { fingers[k] = &c->progeny[k]->sort[j * (c->progeny[k]->count + 1)]; @@ -360,17 +409,17 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { } /* Sort the buffer. */ - for (i = 0; i < 7; i++) - for (k = i + 1; k < 8; k++) + for (int i = 0; i < 7; i++) + for (int k = i + 1; k < 8; k++) if (buff[inds[k]] < buff[inds[i]]) { - temp_i = inds[i]; + int temp_i = inds[i]; inds[i] = inds[k]; inds[k] = temp_i; } /* For each entry in the new sort list. */ finger = &sort[j * (count + 1)]; - for (ind = 0; ind < count; ind++) { + for (int ind = 0; ind < count; ind++) { /* Copy the minimum into the new sort array. */ finger[ind].d = buff[inds[0]]; @@ -381,8 +430,8 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { buff[inds[0]] = fingers[inds[0]]->d; /* Find the smallest entry. */ - for (k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { - temp_i = inds[k - 1]; + for (int k = 1; k < 8 && buff[inds[k]] < buff[inds[k - 1]]; k++) { + int temp_i = inds[k - 1]; inds[k - 1] = inds[k]; inds[k] = temp_i; } @@ -403,12 +452,19 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { /* Otherwise, just sort. */ else { + /* Reset the sort distance if we are in a local cell */ + if (xparts != NULL) { + for (int k = 0; k < count; k++) { + xparts[k].x_diff_sort[0] = 0.0f; + xparts[k].x_diff_sort[1] = 0.0f; + xparts[k].x_diff_sort[2] = 0.0f; + } + } + /* Fill the sort array. */ - for (k = 0; k < count; k++) { - px[0] = parts[k].x[0]; - px[1] = parts[k].x[1]; - px[2] = parts[k].x[2]; - for (j = 0; j < 13; j++) + for (int k = 0; k < count; k++) { + const double px[3] = {parts[k].x[0], parts[k].x[1], parts[k].x[2]}; + for (int j = 0; j < 13; j++) if (flags & (1 << j)) { sort[j * (count + 1) + k].i = k; sort[j * (count + 1) + k].d = px[0] * runner_shift[j][0] + @@ -418,26 +474,47 @@ void runner_do_sort(struct runner *r, struct cell *c, int flags, int clock) { } /* Add the sentinel and sort. */ - for (j = 0; j < 13; j++) + for (int j = 0; j < 13; j++) if (flags & (1 << j)) { sort[j * (count + 1) + count].d = FLT_MAX; sort[j * (count + 1) + count].i = 0; runner_do_sort_ascending(&sort[j * (count + 1)], count); c->sorted |= (1 << j); } + + /* Finally, clear the dx_max_sort field of this cell. */ + c->dx_max_sort = 0.f; + + /* If this was not just an update, invalidate the sorts above this one. */ + if (c->ti_sort < r->e->ti_current) + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) + finger->sorted = 0; } + /* Update the sort timer. */ + c->ti_sort = r->e->ti_current; + #ifdef SWIFT_DEBUG_CHECKS /* Verify the sorting. */ - for (j = 0; j < 13; j++) { + for (int j = 0; j < 13; j++) { if (!(flags & (1 << j))) continue; finger = &sort[j * (count + 1)]; - for (k = 1; k < count; k++) { + for (int k = 1; k < count; k++) { if (finger[k].d < finger[k - 1].d) error("Sorting failed, ascending array."); if (finger[k].i >= count) error("Sorting failed, indices borked."); } } + + /* Make sure the sort flags are consistent (downward). */ + runner_check_sorts(c, flags); + + /* Make sure the sort flags are consistent (upward). */ + for (struct cell *finger = c->parent; finger != NULL; + finger = finger->parent) { + if (finger->sorted & ~c->sorted) error("Inconsistent sort flags."); + } #endif if (clock) TIMER_TOC(timer_dosort); @@ -480,63 +557,6 @@ void runner_do_init_grav(struct runner *r, struct cell *c, int timer) { if (timer) TIMER_TOC(timer_init_grav); } -/** - * @brief Initialize the particles before the density calculation. - * - * @param r The runner thread. - * @param c The cell. - * @param timer 1 if the time is to be recorded. - */ -void runner_do_init(struct runner *r, struct cell *c, int timer) { - - struct part *restrict parts = c->parts; - struct gpart *restrict gparts = c->gparts; - const int count = c->count; - const int gcount = c->gcount; - const struct engine *e = r->e; - const struct space *s = e->s; - - TIMER_TIC; - - /* Anything to do here? */ - if (!cell_is_active(c, e)) return; - - /* Recurse? */ - if (c->split) { - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_do_init(r, c->progeny[k], 0); - } else { - - /* Loop over the parts in this cell. */ - for (int i = 0; i < count; i++) { - - /* Get a direct pointer on the part. */ - struct part *restrict p = &parts[i]; - - if (part_is_active(p, e)) { - - /* Get ready for a density calculation */ - hydro_init_part(p, &s->hs); - } - } - - /* Loop over the gparts in this cell. */ - for (int i = 0; i < gcount; i++) { - - /* Get a direct pointer on the part. */ - struct gpart *restrict gp = &gparts[i]; - - if (gpart_is_active(gp, e)) { - - /* Get ready for a density calculation */ - gravity_init_gpart(gp); - } - } - } - - if (timer) TIMER_TOC(timer_init); -} - /** * @brief Intermediate task after the gradient loop that does final operations * on the gradient quantities and optionally slope limits the gradients @@ -757,9 +777,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { #ifdef SWIFT_DEBUG_CHECKS if (count) { - message("Smoothing length failed to converge on %i particles.", count); - - error("Aborting...."); + error("Smoothing length failed to converge on %i particles.", count); } #else if (count) @@ -784,11 +802,8 @@ static void runner_do_unskip(struct cell *c, struct engine *e) { /* Ignore empty cells. */ if (c->count == 0 && c->gcount == 0) return; - /* Unskip any active tasks. */ - if (cell_is_active(c, e)) { - const int forcerebuild = cell_unskip_tasks(c, &e->sched); - if (forcerebuild) atomic_inc(&e->forcerebuild); - } + /* Skip inactive cells. */ + if (!cell_is_active(c, e)) return; /* Recurse */ if (c->split) { @@ -799,6 +814,10 @@ static void runner_do_unskip(struct cell *c, struct engine *e) { } } } + + /* Unskip any active tasks. */ + const int forcerebuild = cell_unskip_tasks(c, &e->sched); + if (forcerebuild) atomic_inc(&e->forcerebuild); } /** @@ -1116,6 +1135,14 @@ void runner_do_timestep(struct runner *r, struct cell *c, int timer) { TIMER_TIC; + /* Anything to do here? */ + if (!cell_is_active(c, e)) { + c->updated = 0; + c->g_updated = 0; + c->s_updated = 0; + return; + } + int updated = 0, g_updated = 0, s_updated = 0; integertime_t ti_end_min = max_nr_timesteps, ti_end_max = 0, ti_beg_max = 0; @@ -1356,9 +1383,8 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { if (part_is_active(p, e)) { - /* First, finish the force loop */ + /* Finish the force loop */ hydro_end_force(p); - if (p->gpart != NULL) gravity_end_force(p->gpart, const_G); } } @@ -1368,25 +1394,44 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { /* Get a handle on the gpart. */ struct gpart *restrict gp = &gparts[k]; - if (gp->type == swift_type_dark_matter) { + if (gpart_is_active(gp, e)) { - if (gpart_is_active(gp, e)) gravity_end_force(gp, const_G); - } + /* Finish the force calculation */ + gravity_end_force(gp, const_G); + +#ifdef SWIFT_NO_GRAVITY_BELOW_ID + /* Cancel gravity forces of these particles */ + if ((gp->type == swift_type_dark_matter && + gp->id_or_neg_offset < SWIFT_NO_GRAVITY_BELOW_ID) || + (gp->type == swift_type_gas && + parts[-gp->id_or_neg_offset].id < SWIFT_NO_GRAVITY_BELOW_ID) || + (gp->type == swift_type_star && + sparts[-gp->id_or_neg_offset].id < SWIFT_NO_GRAVITY_BELOW_ID)) { + + /* Don't move ! */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; + } +#endif #ifdef SWIFT_DEBUG_CHECKS - if (e->policy & engine_policy_self_gravity && gpart_is_active(gp, e)) { + if (e->policy & engine_policy_self_gravity) { - /* Check that this gpart has interacted with all the other particles - * (via direct or multipoles) in the box */ - gp->num_interacted++; - if (gp->num_interacted != (long long)e->s->nr_gparts) - error( - "g-particle (id=%lld, type=%d) did not interact gravitationally " - "with all other gparts gp->num_interacted=%lld, total_gparts=%zd", - gp->id_or_neg_offset, gp->type, gp->num_interacted, - e->s->nr_gparts); - } + /* Check that this gpart has interacted with all the other + * particles (via direct or multipoles) in the box */ + gp->num_interacted++; + if (gp->num_interacted != (long long)e->s->nr_gparts) + error( + "g-particle (id=%lld, type=%d) did not interact " + "gravitationally " + "with all other gparts gp->num_interacted=%lld, " + "total_gparts=%zd", + gp->id_or_neg_offset, gp->type, gp->num_interacted, + e->s->nr_gparts); + } #endif + } } /* Loop over the star particles in this cell. */ @@ -1396,9 +1441,8 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { struct spart *restrict sp = &sparts[k]; if (spart_is_active(sp, e)) { - /* First, finish the force loop */ + /* Finish the force loop */ star_end_force(sp); - gravity_end_force(sp->gpart, const_G); } } } @@ -1411,9 +1455,11 @@ void runner_do_end_force(struct runner *r, struct cell *c, int timer) { * * @param r The runner thread. * @param c The cell. + * @param clear_sorts Should we clear the sort flag and hence trigger a sort ? * @param timer Are we timing this ? */ -void runner_do_recv_part(struct runner *r, struct cell *c, int timer) { +void runner_do_recv_part(struct runner *r, struct cell *c, int clear_sorts, + int timer) { #ifdef WITH_MPI @@ -1429,6 +1475,9 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int timer) { timebin_t time_bin_max = 0; float h_max = 0.f; + /* Clear this cell's sorted mask. */ + if (clear_sorts) c->sorted = 0; + /* If this cell is a leaf, collect the particle data. */ if (!c->split) { @@ -1454,7 +1503,7 @@ void runner_do_recv_part(struct runner *r, struct cell *c, int timer) { else { for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) { - runner_do_recv_part(r, c->progeny[k], 0); + runner_do_recv_part(r, c->progeny[k], clear_sorts, 0); ti_end_min = min(ti_end_min, c->progeny[k]->ti_end_min); ti_end_max = max(ti_end_max, c->progeny[k]->ti_end_max); h_max = max(h_max, c->progeny[k]->h_max); @@ -1686,7 +1735,7 @@ void *runner_main(void *data) { if (!cell_is_active(ci, e) && t->type != task_type_sort && t->type != task_type_send && t->type != task_type_recv && - t->type != task_type_kick1) + t->type != task_type_kick1 && t->type != task_type_drift) error( "Task (type='%s/%s') should have been skipped ti_current=%lld " "c->ti_end_min=%lld", @@ -1806,9 +1855,6 @@ void *runner_main(void *data) { case task_type_init_grav: runner_do_init_grav(r, ci, 1); break; - case task_type_init: - runner_do_init(r, ci, 1); - break; case task_type_ghost: runner_do_ghost(r, ci, 1); break; @@ -1841,9 +1887,10 @@ void *runner_main(void *data) { if (t->subtype == task_subtype_tend) { cell_unpack_ti_ends(ci, t->buff); free(t->buff); - } else if (t->subtype == task_subtype_xv || - t->subtype == task_subtype_rho) { - runner_do_recv_part(r, ci, 1); + } else if (t->subtype == task_subtype_xv) { + runner_do_recv_part(r, ci, 1, 1); + } else if (t->subtype == task_subtype_rho) { + runner_do_recv_part(r, ci, 1, 1); } else if (t->subtype == task_subtype_gpart) { runner_do_recv_gpart(r, ci, 1); } else if (t->subtype == task_subtype_spart) { diff --git a/src/runner.h b/src/runner.h index 49f7cd88a0b345cac1b29b7fd38cf59b21268012..5cb08cc14222c074b229c9bdf96ac47a072cd34e 100644 --- a/src/runner.h +++ b/src/runner.h @@ -49,11 +49,13 @@ struct runner { /*! The engine owing this runner. */ struct engine *e; +#ifdef WITH_VECTORIZATION /*! The particle cache of cell ci. */ struct cache ci_cache; /*! The particle cache of cell cj. */ struct cache cj_cache; +#endif }; /* Function prototypes. */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 37147d32a901ed97ede1825ee232c41aa4f41fc6..839e492956140bd2b633d1e7c99c2b2a43f89629 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -35,12 +35,6 @@ #define _DOPAIR2(f) PASTE(runner_dopair2, f) #define DOPAIR2 _DOPAIR2(FUNCTION) -#define _DOPAIR1_NOSORT(f) PASTE(runner_dopair1_nosort, f) -#define DOPAIR1_NOSORT _DOPAIR1_NOSORT(FUNCTION) - -#define _DOPAIR2_NOSORT(f) PASTE(runner_dopair2_nosort, f) -#define DOPAIR2_NOSORT _DOPAIR2_NOSORT(FUNCTION) - #define _DOPAIR_SUBSET(f) PASTE(runner_dopair_subset, f) #define DOPAIR_SUBSET _DOPAIR_SUBSET(FUNCTION) @@ -50,11 +44,14 @@ #define _DOPAIR_SUBSET_NAIVE(f) PASTE(runner_dopair_subset_naive, f) #define DOPAIR_SUBSET_NAIVE _DOPAIR_SUBSET_NAIVE(FUNCTION) -#define _DOPAIR_NAIVE(f) PASTE(runner_dopair_naive, f) -#define DOPAIR_NAIVE _DOPAIR_NAIVE(FUNCTION) +#define _DOPAIR1_NAIVE(f) PASTE(runner_dopair1_naive, f) +#define DOPAIR1_NAIVE _DOPAIR1_NAIVE(FUNCTION) -#define _DOSELF_NAIVE(f) PASTE(runner_doself_naive, f) -#define DOSELF_NAIVE _DOSELF_NAIVE(FUNCTION) +#define _DOPAIR2_NAIVE(f) PASTE(runner_dopair2_naive, f) +#define DOPAIR2_NAIVE _DOPAIR2_NAIVE(FUNCTION) + +#define _DOSELF2_NAIVE(f) PASTE(runner_doself2_naive, f) +#define DOSELF2_NAIVE _DOSELF2_NAIVE(FUNCTION) #define _DOSELF1(f) PASTE(runner_doself1, f) #define DOSELF1 _DOSELF1(FUNCTION) @@ -110,21 +107,164 @@ #define _TIMER_DOPAIR_SUBSET(f) PASTE(timer_dopair_subset, f) #define TIMER_DOPAIR_SUBSET _TIMER_DOPAIR_SUBSET(FUNCTION) -#include "runner_doiact_nosort.h" +/** + * @brief Compute the interactions between a cell pair (non-symmetric case). + * + * Inefficient version using a brute-force algorithm. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The second #cell. + */ +void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { + + const struct engine *e = r->e; + +#ifndef SWIFT_DEBUG_CHECKS + error("Don't use in actual runs ! Slow code !"); +#endif + +#ifdef WITH_VECTORIZATION + int icount = 0; + float r2q[VEC_SIZE] __attribute__((aligned(16))); + float hiq[VEC_SIZE] __attribute__((aligned(16))); + float hjq[VEC_SIZE] __attribute__((aligned(16))); + float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); + struct part *piq[VEC_SIZE], *pjq[VEC_SIZE]; +#endif + TIMER_TIC; + + /* Anything to do here? */ + if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + + const int count_i = ci->count; + const int count_j = cj->count; + struct part *restrict parts_i = ci->parts; + struct part *restrict parts_j = cj->parts; + + /* Get the relative distance between the pairs, wrapping. */ + double shift[3] = {0.0, 0.0, 0.0}; + for (int k = 0; k < 3; k++) { + if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) + shift[k] = e->s->dim[k]; + else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) + shift[k] = -e->s->dim[k]; + } + + /* Loop over the parts in ci. */ + for (int pid = 0; pid < count_i; pid++) { + + /* Get a hold of the ith part in ci. */ + struct part *restrict pi = &parts_i[pid]; + const float hi = pi->h; + + double pix[3]; + for (int k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + const float hig2 = hi * hi * kernel_gamma2; + + /* Loop over the parts in cj. */ + for (int pjd = 0; pjd < count_j; pjd++) { + + /* Get a pointer to the jth particle. */ + struct part *restrict pj = &parts_j[pjd]; + + /* Compute the pairwise distance. */ + float r2 = 0.0f; + float dx[3]; + for (int k = 0; k < 3; k++) { + dx[k] = pix[k] - pj->x[k]; + r2 += dx[k] * dx[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + +#ifndef WITH_VECTORIZATION + + IACT_NONSYM(r2, dx, hi, pj->h, pi, pj); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = dx[0]; + dxq[3 * icount + 1] = dx[1]; + dxq[3 * icount + 2] = dx[2]; + hiq[icount] = hi; + hjq[icount] = pj->h; + piq[icount] = pi; + pjq[icount] = pj; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + if (r2 < pj->h * pj->h * kernel_gamma2) { + +#ifndef WITH_VECTORIZATION + + for (int k = 0; k < 3; k++) dx[k] = -dx[k]; + IACT_NONSYM(r2, dx, pj->h, hi, pj, pi); + +#else + + /* Add this interaction to the queue. */ + r2q[icount] = r2; + dxq[3 * icount + 0] = -dx[0]; + dxq[3 * icount + 1] = -dx[1]; + dxq[3 * icount + 2] = -dx[2]; + hiq[icount] = pj->h; + hjq[icount] = hi; + piq[icount] = pj; + pjq[icount] = pi; + icount += 1; + + /* Flush? */ + if (icount == VEC_SIZE) { + IACT_NONSYM_VEC(r2q, dxq, hiq, hjq, piq, pjq); + icount = 0; + } + +#endif + } + + } /* loop over the parts in cj. */ + + } /* loop over the parts in ci. */ + +#ifdef WITH_VECTORIZATION + /* Pick up any leftovers. */ + if (icount > 0) + for (int k = 0; k < icount; k++) + IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); +#endif + + TIMER_TOC(TIMER_DOPAIR); +} /** - * @brief Compute the interactions between a cell pair. + * @brief Compute the interactions between a cell pair (symmetric case). + * + * Inefficient version using a brute-force algorithm. * * @param r The #runner. * @param ci The first #cell. * @param cj The second #cell. */ -void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { +void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, + struct cell *restrict cj) { const struct engine *e = r->e; +#ifndef SWIFT_DEBUG_CHECKS error("Don't use in actual runs ! Slow code !"); +#endif #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -221,11 +361,21 @@ void DOPAIR_NAIVE(struct runner *r, struct cell *restrict ci, TIMER_TOC(TIMER_DOPAIR); } -void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { +/** + * @brief Compute the interactions within a cell (symmetric case). + * + * Inefficient version using a brute-force algorithm. + * + * @param r The #runner. + * @param c The #cell. + */ +void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { const struct engine *e = r->e; +#ifndef SWIFT_DEBUG_CHECKS error("Don't use in actual runs ! Slow code !"); +#endif #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -314,6 +464,8 @@ void DOSELF_NAIVE(struct runner *r, struct cell *restrict c) { * @brief Compute the interactions between a cell pair, but only for the * given indices in ci. * + * Version using a brute-force algorithm. + * * @param r The #runner. * @param ci The first #cell. * @param parts_i The #part to interact with @c cj. @@ -325,9 +477,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, struct part *restrict parts_i, int *restrict ind, int count, struct cell *restrict cj) { - struct engine *e = r->e; - - error("Don't use in actual runs ! Slow code !"); + const struct engine *e = r->e; #ifdef WITH_OLD_VECTORIZATION int icount = 0; @@ -362,6 +512,12 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, const float hi = pi->h; const float hig2 = hi * hi * kernel_gamma2; +#ifdef SWIFT_DEBUG_CHECKS + if (!part_is_active(pi, e)) + error("Trying to correct smoothing length of inactive particle !"); + +#endif + /* Loop over the parts in cj. */ for (int pjd = 0; pjd < count_j; pjd++) { @@ -376,6 +532,14 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, r2 += dx[k] * dx[k]; } +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (pi->ti_drift != e->ti_current) + error("Particle pi not drifted to current time"); + if (pj->ti_drift != e->ti_current) + error("Particle pj not drifted to current time"); +#endif + /* Hit or miss? */ if (r2 < hig2) { @@ -416,7 +580,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif - TIMER_TOC(timer_dopair_subset); + TIMER_TOC(timer_dopair_subset_naive); } /** @@ -436,13 +600,6 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, struct engine *e = r->e; -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR_SUBSET_NOSORT(r, ci, parts_i, ind, count, cj); - return; - } -#endif - #ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -478,11 +635,15 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, sid = sortlistID[sid]; /* Have the cells been sorted? */ - if (!(cj->sorted & (1 << sid))) error("Trying to interact unsorted cells."); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort > space_maxreldx * cj->dmin) { + DOPAIR_SUBSET_NAIVE(r, ci, parts_i, ind, count, cj); + return; + } /* Pick-out the sorted lists. */ const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; - const float dxj = cj->dx_max; + const float dxj = cj->dx_max_sort; /* Parts are on the left? */ if (!flipped) { @@ -714,7 +875,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, IACT_NONSYM(r2q[k], &dxq[3 * k], hiq[k], hjq[k], piq[k], pjq[k]); #endif - TIMER_TOC(timer_dopair_subset); + TIMER_TOC(timer_doself_subset); } /** @@ -728,13 +889,6 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct engine *restrict e = r->e; -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR1_NOSORT(r, ci, cj); - return; - } -#endif - #ifdef WITH_OLD_VECTORIZATION int icount = 0; float r2q[VEC_SIZE] __attribute__((aligned(16))); @@ -749,16 +903,18 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); /* Get the cutoff shift. */ double rshift = 0.0; @@ -768,6 +924,29 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > + 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > + 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } +#endif /* SWIFT_DEBUG_CHECKS */ + /* Get some other useful values. */ const double hi_max = ci->h_max * kernel_gamma - rshift; const double hj_max = cj->h_max * kernel_gamma; @@ -777,7 +956,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj) { struct part *restrict parts_j = cj->parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const float dx_max = (ci->dx_max + cj->dx_max); + const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); if (cell_is_active(ci, e)) { @@ -948,13 +1127,6 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct engine *restrict e = r->e; -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR2_NOSORT(r, ci, cj); - return; - } -#endif - #ifdef WITH_OLD_VECTORIZATION int icount1 = 0; float r2q1[VEC_SIZE] __attribute__((aligned(16))); @@ -975,16 +1147,18 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) error("Cell ci not drifted"); - if (!cell_is_drifted(cj, e)) error("Cell cj not drifted"); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the shift ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); /* Get the cutoff shift. */ double rshift = 0.0; @@ -994,6 +1168,29 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > + 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > + 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } +#endif /* SWIFT_DEBUG_CHECKS */ + /* Get some other useful values. */ const double hi_max = ci->h_max * kernel_gamma - rshift; const double hj_max = cj->h_max * kernel_gamma; @@ -1003,7 +1200,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { struct part *restrict parts_j = cj->parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const double dx_max = (ci->dx_max + cj->dx_max); + const double dx_max = (ci->dx_max_sort + cj->dx_max_sort); /* Collect the number of parts left and right below dt. */ int countdt_i = 0, countdt_j = 0; @@ -1400,7 +1597,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); struct part *restrict parts = c->parts; const int count = c->count; @@ -1876,7 +2073,8 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Recurse? */ if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + + cj->dx_max_sort < h / 2) { /* Different types of flags. */ @@ -2080,9 +2278,17 @@ void DOSUB_PAIR1(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Otherwise, compute the pair directly. */ else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort > ci->dmin * space_maxreldx) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort > cj->dmin * space_maxreldx) + runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ #if (DOPAIR1 == runner_dopair1_density) && defined(WITH_VECTORIZATION) && \ @@ -2110,10 +2316,6 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { /* Should we even bother? */ if (ci->count == 0 || !cell_is_active(ci, r->e)) return; -#ifdef SWIFT_DEBUG_CHECKS - cell_is_drifted(ci, r->e); -#endif - /* Recurse? */ if (ci->split) { @@ -2129,6 +2331,10 @@ void DOSUB_SELF1(struct runner *r, struct cell *ci, int gettimer) { /* Otherwise, compute self-interaction. */ else { + + /* Drift the cell to the current timestep if needed. */ + if (!cell_is_drifted(ci, r->e)) cell_drift_particles(ci, r->e); + #if (DOSELF1 == runner_doself1_density) && defined(WITH_VECTORIZATION) && \ defined(GADGET2_SPH) runner_doself1_density_vec(r, ci); @@ -2174,7 +2380,8 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Recurse? */ if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + + cj->dx_max_sort < h / 2) { /* Different types of flags. */ @@ -2378,9 +2585,17 @@ void DOSUB_PAIR2(struct runner *r, struct cell *ci, struct cell *cj, int sid, /* Otherwise, compute the pair directly. */ else if (cell_is_active(ci, e) || cell_is_active(cj, e)) { + /* Make sure both cells are drifted to the current timestep. */ + if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + /* Do any of the cells need to be sorted first? */ - if (!(ci->sorted & (1 << sid))) runner_do_sort(r, ci, (1 << sid), 1); - if (!(cj->sorted & (1 << sid))) runner_do_sort(r, cj, (1 << sid), 1); + if (!(ci->sorted & (1 << sid)) || + ci->dx_max_sort > ci->dmin * space_maxreldx) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || + cj->dx_max_sort > cj->dmin * space_maxreldx) + runner_do_sort(r, cj, (1 << sid), 1); /* Compute the interactions. */ DOPAIR2(r, ci, cj); @@ -2436,15 +2651,6 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, struct cell *sub = NULL; for (int k = 0; k < 8; k++) if (ci->progeny[k] != NULL) { - // if ( parts[ ind[ 0 ] ].x[0] >= ci->progeny[k]->loc[0] && - // parts[ ind[ 0 ] ].x[0] <= ci->progeny[k]->loc[0] + - // ci->progeny[k]->width[0] && - // parts[ ind[ 0 ] ].x[1] >= ci->progeny[k]->loc[1] && - // parts[ ind[ 0 ] ].x[1] <= ci->progeny[k]->loc[1] + - // ci->progeny[k]->width[1] && - // parts[ ind[ 0 ] ].x[2] >= ci->progeny[k]->loc[2] && - // parts[ ind[ 0 ] ].x[2] <= ci->progeny[k]->loc[2] + - // ci->progeny[k]->width[2] ) { if (&parts[ind[0]] >= &ci->progeny[k]->parts[0] && &parts[ind[0]] < &ci->progeny[k]->parts[ci->progeny[k]->count]) { sub = ci->progeny[k]; @@ -2480,7 +2686,8 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, /* Recurse? */ if (ci->split && cj->split && - max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max + cj->dx_max < + max(ci->h_max, cj->h_max) * kernel_gamma + ci->dx_max_sort + + cj->dx_max_sort < h / 2) { /* Get the type of pair if not specified explicitly. */ @@ -3010,59 +3217,13 @@ void DOSUB_SUBSET(struct runner *r, struct cell *ci, struct part *parts, : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1); new_sid = sortlistID[new_sid]; - /* Do any of the cells need to be sorted first? */ - if (!(cj->sorted & (1 << new_sid))) - runner_do_sort(r, cj, (1 << new_sid), 1); + /* Do any of the cells need to be drifted first? */ + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); - /* Compute the interactions. */ DOPAIR_SUBSET(r, ci, parts, ind, count, cj); } } /* otherwise, pair interaction. */ - if (gettimer) TIMER_TOC(TIMER_DOSUB_PAIR); -} - -/** - * @brief Determine which version of DOPAIR1 needs to be called depending on MPI, vectorisation and orientation of the cells or whether DOPAIR1 needs to be called at all. - * - * @param r #runner - * @param ci #cell ci - * @param cj #cell cj - * - */ -void DOPAIR1_BRANCH(struct runner *r, struct cell *ci, struct cell *cj) { - - const struct engine *restrict e = r->e; - -#ifdef WITH_MPI - if (ci->nodeID != cj->nodeID) { - DOPAIR1_NOSORT(r, ci, cj); - return; - } -#endif - - /* Anything to do here? */ - if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - - /* Drift cells that are not drifted. */ - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); - - /* Get the sort ID. */ - double shift[3] = {0.0, 0.0, 0.0}; - const int sid = space_getsid(e->s, &ci, &cj, shift); - - /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); - -#if defined(WITH_VECTORIZATION) && defined(GADGET2_SPH) && (DOPAIR1_BRANCH == runner_dopair1_density_branch) - if(!sort_is_corner(sid)) - runner_dopair1_density_vec(r, ci, cj); - else - DOPAIR1(r, ci, cj); -#else - DOPAIR1(r, ci, cj); -#endif + if (gettimer) TIMER_TOC(timer_dosub_subset); } diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 065e99dba8ee08a6a4fb91245a9d53ffdc7caccc..13a55344d773e7fba000d680eae9866dffdd88e1 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -43,6 +43,10 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if (c->ti_old_multipole != e->ti_current) error("c->multipole not drifted."); +#endif + if (c->split) { /* Node case */ /* Add the field-tensor to all the 8 progenitors */ @@ -53,6 +57,11 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { /* Do we have a progenitor with any active g-particles ? */ if (cp != NULL && cell_is_active(cp, e)) { +#ifdef SWIFT_DEBUG_CHECKS + if (cp->ti_old_multipole != e->ti_current) + error("cp->multipole not drifted."); +#endif + /* Shift the field tensor */ gravity_L2L(&temp, &c->multipole->pot, cp->multipole->CoM, c->multipole->CoM, 0 * periodic); @@ -74,8 +83,16 @@ void runner_do_grav_down(struct runner *r, struct cell *c, int timer) { struct gpart *gp = &gparts[i]; /* Update if active */ - if (gpart_is_active(gp, e)) + if (gpart_is_active(gp, e)) { + +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gp->ti_drift != e->ti_current) + error("gpart not drifted to current time"); +#endif + gravity_L2P(&c->multipole->pot, c->multipole->CoM, gp); + } } } @@ -102,14 +119,17 @@ void runner_dopair_grav_mm(const struct runner *r, struct cell *restrict ci, TIMER_TIC; + /* Anything to do here? */ + if (!cell_is_active(ci, e)) return; + #ifdef SWIFT_DEBUG_CHECKS if (ci == cj) error("Interacting a cell with itself using M2L"); if (multi_j->M_000 == 0.f) error("Multipole does not seem to have been set."); -#endif - /* Anything to do here? */ - if (!cell_is_active(ci, e)) return; + if (ci->ti_old_multipole != e->ti_current) + error("ci->multipole not drifted."); +#endif /* Do we need to drift the multipole ? */ if (cj->ti_old_multipole != e->ti_current) cell_drift_multipole(cj, e); @@ -161,6 +181,10 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; + /* Let's start by drifting things */ + if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); + if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + #if ICHECK > 0 for (int pid = 0; pid < gcount_i; pid++) { @@ -188,60 +212,80 @@ void runner_dopair_grav_pp(struct runner *r, struct cell *ci, struct cell *cj) { /* MATTHIEU: Should we use local DP accumulators ? */ /* Loop over all particles in ci... */ - for (int pid = 0; pid < gcount_i; pid++) { + if (cell_is_active(ci, e)) { + for (int pid = 0; pid < gcount_i; pid++) { - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpi = &gparts_i[pid]; + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpi = &gparts_i[pid]; - if (!gpart_is_active(gpi, e)) continue; + if (!gpart_is_active(gpi, e)) continue; - /* Loop over every particle in the other cell. */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_j; pjd++) { - /* Get a hold of the jth part in cj. */ - const struct gpart *restrict gpj = &gparts_j[pjd]; + /* Get a hold of the jth part in cj. */ + const struct gpart *restrict gpj = &gparts_j[pjd]; - /* Compute the pairwise distance. */ - const float dx[3] = {gpi->x[0] - gpj->x[0], // x - gpi->x[1] - gpj->x[1], // y - gpi->x[2] - gpj->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Compute the pairwise distance. */ + const float dx[3] = {gpi->x[0] - gpj->x[0], // x + gpi->x[1] - gpj->x[1], // y + gpi->x[2] - gpj->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Interact ! */ - runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact ! */ + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj); #ifdef SWIFT_DEBUG_CHECKS - gpi->num_interacted++; + gpi->num_interacted++; #endif + } } } /* Loop over all particles in cj... */ - for (int pjd = 0; pjd < gcount_j; pjd++) { + if (cell_is_active(cj, e)) { + for (int pjd = 0; pjd < gcount_j; pjd++) { - /* Get a hold of the ith part in ci. */ - struct gpart *restrict gpj = &gparts_j[pjd]; + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpj = &gparts_j[pjd]; - if (!gpart_is_active(gpj, e)) continue; + if (!gpart_is_active(gpj, e)) continue; - /* Loop over every particle in the other cell. */ - for (int pid = 0; pid < gcount_i; pid++) { + /* Loop over every particle in the other cell. */ + for (int pid = 0; pid < gcount_i; pid++) { - /* Get a hold of the ith part in ci. */ - const struct gpart *restrict gpi = &gparts_i[pid]; + /* Get a hold of the ith part in ci. */ + const struct gpart *restrict gpi = &gparts_i[pid]; - /* Compute the pairwise distance. */ - const float dx[3] = {gpj->x[0] - gpi->x[0], // x - gpj->x[1] - gpi->x[1], // y - gpj->x[2] - gpi->x[2]}; // z - const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + /* Compute the pairwise distance. */ + const float dx[3] = {gpj->x[0] - gpi->x[0], // x + gpj->x[1] - gpi->x[1], // y + gpj->x[2] - gpi->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Interact ! */ - runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi); +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + + /* Interact ! */ + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi); #ifdef SWIFT_DEBUG_CHECKS - gpj->num_interacted++; + gpj->num_interacted++; #endif + } } } @@ -273,6 +317,9 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { /* Anything to do here? */ if (!cell_is_active(c, e)) return; + /* Do we need to start by drifting things ? */ + if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + #if ICHECK > 0 for (int pid = 0; pid < gcount; pid++) { @@ -306,6 +353,14 @@ void runner_doself_grav_pp(struct runner *r, struct cell *c) { gpi->x[2] - gpj->x[2]}; // z const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that particles have been drifted to the current time */ + if (gpi->ti_drift != e->ti_current) + error("gpi not drifted to current time"); + if (gpj->ti_drift != e->ti_current) + error("gpj not drifted to current time"); +#endif + /* Interact ! */ if (gpart_is_active(gpi, e) && gpart_is_active(gpj, e)) { @@ -403,7 +458,8 @@ void runner_dopair_grav(struct runner *r, struct cell *ci, struct cell *cj, TIMER_TIC; /* Can we use M-M interactions ? */ - if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv)) { + if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv, + 0)) { /* MATTHIEU: make a symmetric M-M interaction function ! */ runner_dopair_grav_mm(r, ci, cj); runner_dopair_grav_mm(r, cj, ci); @@ -527,11 +583,6 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, } else { -#ifdef SWIFT_DEBUG_CHECKS - if (!cell_are_neighbours(ci, cj)) - error("Non-neighbouring cells in pair task !"); -#endif - runner_dopair_grav(r, ci, cj, 1); } } @@ -588,11 +639,20 @@ void runner_do_grav_long_range(struct runner *r, struct cell *ci, int timer) { if (ci == cj || cj->gcount == 0) continue; /* Check the multipole acceptance criterion */ - if (gravity_multipole_accept(ci->multipole, cj->multipole, - theta_crit_inv)) { + if (gravity_multipole_accept(ci->multipole, cj->multipole, theta_crit_inv, + 0)) { + /* Go for a (non-symmetric) M-M calculation */ runner_dopair_grav_mm(r, ci, cj); } + /* Is the criterion violated now but was OK at the last rebuild ? */ + else if (gravity_multipole_accept(ci->multipole, cj->multipole, + theta_crit_inv, 1)) { + + /* Alright, we have to take charge of that pair in a different way. */ + // MATTHIEU: We should actually open the tree-node here and recurse. + runner_dopair_grav_mm(r, ci, cj); + } } if (timer) TIMER_TOC(timer_dograv_long_range); diff --git a/src/runner_doiact_vec.c b/src/runner_doiact_vec.c index fd15fc9e775f7a05d45056c23c6567aa3ac9d0c4..c55a482cfb0244ccf78dadc4f98031b9f36f64e0 100644 --- a/src/runner_doiact_vec.c +++ b/src/runner_doiact_vec.c @@ -50,9 +50,6 @@ static const vector kernel_gamma2_vec = FILL_VEC(kernel_gamma2); // printf("]\n"); //} -#endif - -#ifdef WITH_VECTORIZATION /** * @brief Compute the vector remainder interactions from the secondary cache. * @@ -589,7 +586,8 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( int first_pi = 0, last_pj = cj->count - 1; - /* Find the first active particle in ci to interact with any particle in cj. */ + /* Find the first active particle in ci to interact with any particle in cj. + */ /* Populate max_di with distances. */ int active_id = ci->count - 1; for (int k = ci->count - 1; k >= 0; k--) { @@ -599,20 +597,19 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( max_di[k] = d; - /* If the particle is out of range set the index to + /* If the particle is out of range set the index to * the last active particle within range. */ if (d < dj_min) { first_pi = active_id; break; - } - else { - if(part_is_active(p,e)) active_id = k; + } else { + if (part_is_active(p, e)) active_id = k; } } /* Find the maximum distance of pi particles into cj.*/ - for (int k = first_pi; k < ci->count; k++) { - max_di[k] = fmaxf(max_di[k - 1],max_di[k]); + for (int k = first_pi + 1; k < ci->count; k++) { + max_di[k] = fmaxf(max_di[k - 1], max_di[k]); } /* Find the last particle in cj to interact with any particle in ci. */ @@ -622,17 +619,16 @@ __attribute__((always_inline)) INLINE static void populate_max_d_no_cache( p = &parts_j[sort_j[k].i]; h = p->h; d = sort_j[k].d - h * kernel_gamma - dx_max - rshift; - + max_dj[k] = d; - - /* If the particle is out of range set the index to + + /* If the particle is out of range set the index to * the last active particle within range. */ if (d > di_max) { last_pj = active_id; break; - } - else { - if(part_is_active(p,e)) active_id = k; + } else { + if (part_is_active(p, e)) active_id = k; } } @@ -672,7 +668,7 @@ __attribute__((always_inline)) INLINE void runner_doself1_density_vec( if (!cell_is_active(c, e)) return; - if (!cell_is_drifted(c, e)) cell_drift_particles(c, e); + if (!cell_is_drifted(c, e)) error("Interacting undrifted cell."); /* Get the particle cache from the runner and re-allocate * the cache if it is not big enough for the cell. */ @@ -1615,16 +1611,18 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Anything to do here? */ if (!cell_is_active(ci, e) && !cell_is_active(cj, e)) return; - if (!cell_is_drifted(ci, e)) cell_drift_particles(ci, e); - if (!cell_is_drifted(cj, e)) cell_drift_particles(cj, e); + if (!cell_is_drifted(ci, e) || !cell_is_drifted(cj, e)) + error("Interacting undrifted cells."); /* Get the sort ID. */ double shift[3] = {0.0, 0.0, 0.0}; const int sid = space_getsid(e->s, &ci, &cj, shift); /* Have the cells been sorted? */ - if (!(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid))) - error("Trying to interact unsorted cells."); + if (!(ci->sorted & (1 << sid)) || ci->dx_max_sort > space_maxreldx * ci->dmin) + runner_do_sort(r, ci, (1 << sid), 1); + if (!(cj->sorted & (1 << sid)) || cj->dx_max_sort > space_maxreldx * cj->dmin) + runner_do_sort(r, cj, (1 << sid), 1); /* Get the cutoff shift. */ double rshift = 0.0; @@ -1634,6 +1632,29 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, const struct entry *restrict sort_i = &ci->sort[sid * (ci->count + 1)]; const struct entry *restrict sort_j = &cj->sort[sid * (cj->count + 1)]; +#ifdef SWIFT_DEBUG_CHECKS + /* Check that the dx_max_sort values in the cell are indeed an upper + bound on particle movement. */ + for (int pid = 0; pid < ci->count; pid++) { + const struct part *p = &ci->parts[sort_i[pid].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_i[pid].d) - ci->dx_max_sort > + 1.0e-6 * max(fabsf(d), ci->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } + for (int pjd = 0; pjd < cj->count; pjd++) { + const struct part *p = &cj->parts[sort_j[pjd].i]; + const float d = p->x[0] * runner_shift[sid][0] + + p->x[1] * runner_shift[sid][1] + + p->x[2] * runner_shift[sid][2]; + if (fabsf(d - sort_j[pjd].d) - cj->dx_max_sort > + 1.0e-6 * max(fabsf(d), cj->dx_max_sort)) + error("particle shift diff exceeds dx_max_sort."); + } +#endif /* SWIFT_DEBUG_CHECKS */ + /* Get some other useful values. */ const int count_i = ci->count; const int count_j = cj->count; @@ -1643,31 +1664,31 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, struct part *restrict parts_j = cj->parts; const double di_max = sort_i[count_i - 1].d - rshift; const double dj_min = sort_j[0].d; - const float dx_max = (ci->dx_max + cj->dx_max); + const float dx_max = (ci->dx_max_sort + cj->dx_max_sort); /* Check if any particles are active and return if there are not. */ - int numActive = 0; - for (int pid = count_i - 1; - pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { + int numActive = 0; + for (int pid = count_i - 1; + pid >= 0 && sort_i[pid].d + hi_max + dx_max > dj_min; pid--) { struct part *restrict pi = &parts_i[sort_i[pid].i]; if (part_is_active(pi, e)) { numActive++; break; - } + } } - if(!numActive) { + if (!numActive) { for (int pjd = 0; pjd < count_j && sort_j[pjd].d - hj_max - dx_max < di_max; - pjd++) { + pjd++) { struct part *restrict pj = &parts_j[sort_j[pjd].i]; if (part_is_active(pj, e)) { numActive++; break; - } - } + } + } } - if(numActive == 0) return; + if (numActive == 0) return; /* Get both particle caches from the runner and re-allocate * them if they are not big enough for the cells. */ @@ -1692,7 +1713,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Also find the first pi that interacts with any particle in cj and the last * pj that interacts with any particle in ci. */ populate_max_d_no_cache(ci, cj, sort_i, sort_j, dx_max, rshift, max_di, - max_dj, &first_pi, &last_pj, e); + max_dj, &first_pi, &last_pj, e); /* Find the maximum index into cj that is required by a particle in ci. */ /* Find the maximum index into ci that is required by a particle in cj. */ @@ -1727,8 +1748,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, int first_pi_align = first_pi; int last_pj_align = last_pj; cache_read_two_partial_cells_sorted(ci, cj, ci_cache, cj_cache, sort_i, - sort_j, shift, &first_pi_align, - &last_pj_align, 1); + sort_j, shift, &first_pi_align, + &last_pj_align, 1); /* Get the number of particles read into the ci cache. */ int ci_cache_count = count_i - first_pi_align; @@ -1772,7 +1793,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Reset cumulative sums of update vectors. */ vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, - curlvySum, curlvzSum; + curlvySum, curlvzSum; /* Get the inverse of hi. */ vector v_hi_inv; @@ -1809,8 +1830,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, vector v_dx, v_dy, v_dz, v_r2; #ifdef SWIFT_DEBUG_CHECKS - if (cj_cache_idx%VEC_SIZE != 0 || cj_cache_idx < 0) { - error("Unaligned read!!! cj_cache_idx=%d",cj_cache_idx); + if (cj_cache_idx % VEC_SIZE != 0 || cj_cache_idx < 0) { + error("Unaligned read!!! cj_cache_idx=%d", cj_cache_idx); } #endif @@ -1848,7 +1869,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, #ifdef HAVE_AVX512_F knl_mask); #else - 0); + 0); #endif } /* loop over the parts in cj. */ @@ -1906,7 +1927,7 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Reset cumulative sums of update vectors. */ vector rhoSum, rho_dhSum, wcountSum, wcount_dhSum, div_vSum, curlvxSum, - curlvySum, curlvzSum; + curlvySum, curlvzSum; /* Get the inverse of hj. */ vector v_hj_inv; @@ -1929,17 +1950,18 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, /* Pad the exit iteration align so cache reads are aligned. */ int rem = exit_iteration_align % VEC_SIZE; - if ( exit_iteration_align < VEC_SIZE) { + if (exit_iteration_align < VEC_SIZE) { exit_iteration_align = 0; - } - else exit_iteration_align -= rem; + } else + exit_iteration_align -= rem; /* Loop over the parts in ci. */ - for (int ci_cache_idx = exit_iteration_align; ci_cache_idx < ci_cache_count; ci_cache_idx += VEC_SIZE) { + for (int ci_cache_idx = exit_iteration_align; + ci_cache_idx < ci_cache_count; ci_cache_idx += VEC_SIZE) { #ifdef SWIFT_DEBUG_CHECKS - if (ci_cache_idx%VEC_SIZE != 0 || ci_cache_idx < 0) { - error("Unaligned read!!! ci_cache_idx=%d",ci_cache_idx); + if (ci_cache_idx % VEC_SIZE != 0 || ci_cache_idx < 0) { + error("Unaligned read!!! ci_cache_idx=%d", ci_cache_idx); } #endif @@ -1979,12 +2001,13 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, #ifdef HAVE_AVX512_F knl_mask); #else - 0); + 0); #endif } /* loop over the parts in ci. */ - /* Perform horizontal adds on vector sums and store result in particle pj. */ + /* Perform horizontal adds on vector sums and store result in particle pj. + */ VEC_HADD(rhoSum, pj->rho); VEC_HADD(rho_dhSum, pj->density.rho_dh); VEC_HADD(wcountSum, pj->density.wcount); @@ -1995,9 +2018,8 @@ void runner_dopair1_density_vec(struct runner *r, struct cell *ci, VEC_HADD(curlvzSum, pj->density.rot_v[2]); } /* loop over the parts in cj. */ - - TIMER_TOC(timer_dopair_density); + TIMER_TOC(timer_dopair_density); } #endif /* WITH_VECTORIZATION */ diff --git a/src/scheduler.c b/src/scheduler.c index 62fce53762e0b3c84396dfb9c54190199b41753b..9cf1598f6d3bfd31b1120058d85b18edf44c427f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -65,6 +65,11 @@ void scheduler_clear_active(struct scheduler *s) { s->active_count = 0; } */ void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb) { +#ifdef SWIFT_DEBUG_CHECKS + if (ta == NULL) error("Unlocking task is NULL."); + if (tb == NULL) error("Unlocked task is NULL."); +#endif + /* Get an index at which to store this unlock. */ const int ind = atomic_inc(&s->nr_unlocks); @@ -136,8 +141,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { ((t->type == task_type_kick1) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_kick2) && t->ci->nodeID != s->nodeID) || ((t->type == task_type_drift) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID) || - ((t->type == task_type_init) && t->ci->nodeID != s->nodeID)) { + ((t->type == task_type_timestep) && t->ci->nodeID != s->nodeID)) { t->type = task_type_none; t->subtype = task_subtype_none; t->cj = NULL; @@ -169,6 +173,19 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* convert to a self-subtask. */ t->type = task_type_sub_self; + /* Make sure we have a drift task (MATTHIEU temp. fix for gravity) */ + if (t->subtype == task_subtype_grav || + t->subtype == task_subtype_external_grav) { + lock_lock(&ci->lock); + if (ci->drift == NULL) + ci->drift = scheduler_addtask(s, task_type_drift, + task_subtype_none, 0, 0, ci, NULL); + lock_unlock_blind(&ci->lock); + } + + /* Depend on local sorts on this cell. */ + if (ci->sorts != NULL) scheduler_addunlock(s, ci->sorts, t); + /* Otherwise, make tasks explicitly. */ } else { @@ -183,7 +200,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { if (ci->progeny[k] != NULL) scheduler_splittask( scheduler_addtask(s, task_type_self, t->subtype, 0, 0, - ci->progeny[k], NULL, 0), + ci->progeny[k], NULL), s); /* Make a task for each pair of progeny unless it's ext. gravity. */ @@ -196,12 +213,21 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0, ci->progeny[j], - ci->progeny[k], 0), + ci->progeny[k]), s); } } } + /* Otherwise, make sure the self task has a drift task. */ + else { + lock_lock(&ci->lock); + if (ci->drift == NULL) + ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, + 0, 0, ci, NULL); + lock_unlock_blind(&ci->lock); + } + /* Pair interaction? */ } else if (t->type == task_type_pair && t->subtype != task_subtype_grav) { @@ -235,6 +261,10 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Make this task a sub task. */ t->type = task_type_sub_pair; + /* Depend on the sort tasks of both cells. */ + if (ci->sorts != NULL) scheduler_addunlock(s, ci->sorts, t); + if (cj->sorts != NULL) scheduler_addunlock(s, cj->sorts, t); + /* Otherwise, split it. */ } else { @@ -254,18 +284,17 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[6]; t->cj = cj->progeny[0]; t->flags = 1; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[7], cj->progeny[1], 1), + ci->progeny[7], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[6], cj->progeny[1], 1), + ci->progeny[6], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[7], cj->progeny[0], 1), + ci->progeny[7], cj->progeny[0]), s); break; @@ -273,25 +302,23 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[6]; t->cj = cj->progeny[1]; t->flags = 2; - t->tight = 1; break; case 3: /* ( 1 , 0 , 1 ) */ t->ci = ci->progeny[5]; t->cj = cj->progeny[0]; t->flags = 3; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[7], cj->progeny[2], 1), + ci->progeny[7], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[5], cj->progeny[2], 1), + ci->progeny[5], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[7], cj->progeny[0], 1), + ci->progeny[7], cj->progeny[0]), s); break; @@ -299,66 +326,65 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[4]; t->cj = cj->progeny[0]; t->flags = 4; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[5], cj->progeny[0], 1), + ci->progeny[5], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[6], cj->progeny[0], 1), + ci->progeny[6], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[7], cj->progeny[0], 1), + ci->progeny[7], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[4], cj->progeny[1], 1), + ci->progeny[4], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, - ci->progeny[5], cj->progeny[1], 1), + ci->progeny[5], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[6], cj->progeny[1], 1), + ci->progeny[6], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[7], cj->progeny[1], 1), + ci->progeny[7], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[4], cj->progeny[2], 1), + ci->progeny[4], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[5], cj->progeny[2], 1), + ci->progeny[5], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, - ci->progeny[6], cj->progeny[2], 1), + ci->progeny[6], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[7], cj->progeny[2], 1), + ci->progeny[7], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[4], cj->progeny[3], 1), + ci->progeny[4], cj->progeny[3]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[5], cj->progeny[3], 1), + ci->progeny[5], cj->progeny[3]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[6], cj->progeny[3], 1), + ci->progeny[6], cj->progeny[3]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 4, 0, - ci->progeny[7], cj->progeny[3], 1), + ci->progeny[7], cj->progeny[3]), s); break; @@ -366,18 +392,17 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[4]; t->cj = cj->progeny[1]; t->flags = 5; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[6], cj->progeny[3], 1), + ci->progeny[6], cj->progeny[3]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[4], cj->progeny[3], 1), + ci->progeny[4], cj->progeny[3]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[6], cj->progeny[1], 1), + ci->progeny[6], cj->progeny[1]), s); break; @@ -385,25 +410,23 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[5]; t->cj = cj->progeny[2]; t->flags = 6; - t->tight = 1; break; case 7: /* ( 1 , -1 , 0 ) */ t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 6; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[5], cj->progeny[2], 1), + ci->progeny[5], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[4], cj->progeny[2], 1), + ci->progeny[4], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[5], cj->progeny[3], 1), + ci->progeny[5], cj->progeny[3]), s); break; @@ -411,25 +434,23 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[4]; t->cj = cj->progeny[3]; t->flags = 8; - t->tight = 1; break; case 9: /* ( 0 , 1 , 1 ) */ t->ci = ci->progeny[3]; t->cj = cj->progeny[0]; t->flags = 9; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[7], cj->progeny[4], 1), + ci->progeny[7], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[3], cj->progeny[4], 1), + ci->progeny[3], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[7], cj->progeny[0], 1), + ci->progeny[7], cj->progeny[0]), s); break; @@ -437,66 +458,65 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[2]; t->cj = cj->progeny[0]; t->flags = 10; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[3], cj->progeny[0], 1), + ci->progeny[3], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[6], cj->progeny[0], 1), + ci->progeny[6], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[7], cj->progeny[0], 1), + ci->progeny[7], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[2], cj->progeny[1], 1), + ci->progeny[2], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, - ci->progeny[3], cj->progeny[1], 1), + ci->progeny[3], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[6], cj->progeny[1], 1), + ci->progeny[6], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 7, 0, - ci->progeny[7], cj->progeny[1], 1), + ci->progeny[7], cj->progeny[1]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[2], cj->progeny[4], 1), + ci->progeny[2], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[3], cj->progeny[4], 1), + ci->progeny[3], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, - ci->progeny[6], cj->progeny[4], 1), + ci->progeny[6], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[7], cj->progeny[4], 1), + ci->progeny[7], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[2], cj->progeny[5], 1), + ci->progeny[2], cj->progeny[5]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 1, 0, - ci->progeny[3], cj->progeny[5], 1), + ci->progeny[3], cj->progeny[5]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[6], cj->progeny[5], 1), + ci->progeny[6], cj->progeny[5]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 10, 0, - ci->progeny[7], cj->progeny[5], 1), + ci->progeny[7], cj->progeny[5]), s); break; @@ -504,18 +524,17 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[2]; t->cj = cj->progeny[1]; t->flags = 11; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[6], cj->progeny[5], 1), + ci->progeny[6], cj->progeny[5]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[2], cj->progeny[5], 1), + ci->progeny[2], cj->progeny[5]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[6], cj->progeny[1], 1), + ci->progeny[6], cj->progeny[1]), s); break; @@ -523,66 +542,65 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { t->ci = ci->progeny[1]; t->cj = cj->progeny[0]; t->flags = 12; - t->tight = 1; scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[3], cj->progeny[0], 1), + ci->progeny[3], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[5], cj->progeny[0], 1), + ci->progeny[5], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 2, 0, - ci->progeny[7], cj->progeny[0], 1), + ci->progeny[7], cj->progeny[0]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[1], cj->progeny[2], 1), + ci->progeny[1], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, - ci->progeny[3], cj->progeny[2], 1), + ci->progeny[3], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 8, 0, - ci->progeny[5], cj->progeny[2], 1), + ci->progeny[5], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 5, 0, - ci->progeny[7], cj->progeny[2], 1), + ci->progeny[7], cj->progeny[2]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[1], cj->progeny[4], 1), + ci->progeny[1], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 6, 0, - ci->progeny[3], cj->progeny[4], 1), + ci->progeny[3], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, - ci->progeny[5], cj->progeny[4], 1), + ci->progeny[5], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 11, 0, - ci->progeny[7], cj->progeny[4], 1), + ci->progeny[7], cj->progeny[4]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[1], cj->progeny[6], 1), + ci->progeny[1], cj->progeny[6]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 3, 0, - ci->progeny[3], cj->progeny[6], 1), + ci->progeny[3], cj->progeny[6]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 9, 0, - ci->progeny[5], cj->progeny[6], 1), + ci->progeny[5], cj->progeny[6]), s); scheduler_splittask( scheduler_addtask(s, task_type_pair, t->subtype, 12, 0, - ci->progeny[7], cj->progeny[6], 1), + ci->progeny[7], cj->progeny[6]), s); break; } /* switch(sid) */ @@ -604,7 +622,7 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { if (cj->progeny[k] != NULL) { struct task *tl = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, - ci->progeny[j], cj->progeny[k], 0); + ci->progeny[j], cj->progeny[k]); scheduler_splittask(tl, s); tl->flags = space_getsid(s->space, &t->ci, &t->cj, shift); } @@ -612,11 +630,14 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Otherwise, if not spilt, stitch-up the sorting. */ } else { - /* Create the sort for ci. */ + /* Create the drift and sort for ci. */ lock_lock(&ci->lock); + if (ci->drift == NULL && ci->nodeID == engine_rank) + ci->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, + 0, 0, ci, NULL); if (ci->sorts == NULL) ci->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, - 1 << sid, 0, ci, NULL, 0); + 1 << sid, 0, ci, NULL); else ci->sorts->flags |= (1 << sid); lock_unlock_blind(&ci->lock); @@ -624,9 +645,12 @@ static void scheduler_splittask(struct task *t, struct scheduler *s) { /* Create the sort for cj. */ lock_lock(&cj->lock); + if (cj->drift == NULL && cj->nodeID == engine_rank) + cj->drift = scheduler_addtask(s, task_type_drift, task_subtype_none, + 0, 0, cj, NULL); if (cj->sorts == NULL) cj->sorts = scheduler_addtask(s, task_type_sort, task_subtype_none, - 1 << sid, 0, cj, NULL, 0); + 1 << sid, 0, cj, NULL); else cj->sorts->flags |= (1 << sid); lock_unlock_blind(&cj->lock); @@ -690,11 +714,10 @@ void scheduler_splittasks(struct scheduler *s) { * @param wait The number of unsatisfied dependencies of this task. * @param ci The first cell to interact. * @param cj The second cell to interact. - * @param tight */ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, enum task_subtypes subtype, int flags, int wait, - struct cell *ci, struct cell *cj, int tight) { + struct cell *ci, struct cell *cj) { #ifdef SWIFT_DEBUG_CHECKS if (ci == NULL && cj != NULL) @@ -719,7 +742,6 @@ struct task *scheduler_addtask(struct scheduler *s, enum task_types type, t->ci = ci; t->cj = cj; t->skip = 1; /* Mark tasks as skip by default. */ - t->tight = tight; t->implicit = 0; t->weight = 0; t->rank = 0; @@ -854,13 +876,13 @@ void scheduler_ranktasks(struct scheduler *s) { } /* Main loop. */ - for (int j = 0, rank = 0; left < nr_tasks; rank++) { + for (int j = 0, rank = 0; j < nr_tasks; rank++) { /* Did we get anything? */ if (j == left) error("Unsatisfiable task dependencies detected."); - const int left_old = left; /* Unlock the next layer of tasks. */ + const int left_old = left; for (; j < left_old; j++) { struct task *t = &tasks[tid[j]]; t->rank = rank; @@ -876,14 +898,14 @@ void scheduler_ranktasks(struct scheduler *s) { } } - /* Move back to the old left (like Sanders). */ + /* Move back to the old left (like Sanders!). */ j = left_old; } #ifdef SWIFT_DEBUG_CHECKS /* Verify that the tasks were ranked correctly. */ for (int k = 1; k < s->nr_tasks; k++) - if (tasks[tid[k - 1]].rank > tasks[tid[k - 1]].rank) + if (tasks[tid[k - 1]].rank > tasks[tid[k]].rank) error("Task ranking failed."); #endif } @@ -1002,9 +1024,6 @@ void scheduler_reweight(struct scheduler *s, int verbose) { case task_type_timestep: cost = wscale * t->ci->count; break; - case task_type_init: - cost = wscale * t->ci->count; - break; default: cost = 0; break; @@ -1088,6 +1107,15 @@ void scheduler_enqueue_mapper(void *map_data, int num_elements, */ void scheduler_start(struct scheduler *s) { +/* Reset all task debugging timers */ +#ifdef SWIFT_DEBUG_TASKS + for (int i = 0; i < s->nr_tasks; ++i) { + s->tasks[i].tic = 0; + s->tasks[i].toc = 0; + s->tasks[i].rid = -1; + } +#endif + /* Re-wait the tasks. */ if (s->active_count > 1000) { threadpool_map(s->threadpool, scheduler_rewait_mapper, s->tid_active, @@ -1121,7 +1149,7 @@ void scheduler_start(struct scheduler *s) { } else if (cj == NULL) { /* self */ if (ci->ti_end_min == ti_current && t->skip && - t->type != task_type_sort && t->type) + t->type != task_type_sort && t->type != task_type_drift && t->type) error( "Task (type='%s/%s') should not have been skipped " "ti_current=%lld " @@ -1214,7 +1242,6 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { case task_type_kick2: case task_type_drift: case task_type_timestep: - case task_type_init: qid = t->ci->super->owner; break; case task_type_pair: diff --git a/src/scheduler.h b/src/scheduler.h index f2225f5f5b8d0a5db54eb8506e02d78b14f4bb88..7bf9a40e7cec89eb25dfa6ce7a56912bf3a9e639 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -134,7 +134,7 @@ void scheduler_ranktasks(struct scheduler *s); void scheduler_reweight(struct scheduler *s, int verbose); struct task *scheduler_addtask(struct scheduler *s, enum task_types type, enum task_subtypes subtype, int flags, int wait, - struct cell *ci, struct cell *cj, int tight); + struct cell *ci, struct cell *cj); void scheduler_splittasks(struct scheduler *s); struct task *scheduler_done(struct scheduler *s, struct task *t); struct task *scheduler_unlock(struct scheduler *s, struct task *t); diff --git a/src/space.c b/src/space.c index 0ea1e4ab3299c29d3b47a1af718dc9cdcb47b285..0c67fe27ac1c7b2cae3672e7f0b1ce82be5fb804 100644 --- a/src/space.c +++ b/src/space.c @@ -206,11 +206,11 @@ void space_rebuild_recycle_mapper(void *map_data, int num_elements, c->force = NULL; c->grav = NULL; c->dx_max = 0.0f; + c->dx_max_sort = 0.0f; c->sorted = 0; c->count = 0; c->gcount = 0; c->scount = 0; - c->init = NULL; c->init_grav = NULL; c->extra_ghost = NULL; c->ghost = NULL; @@ -2027,6 +2027,7 @@ void space_split_recursive(struct space *s, struct cell *c, cp->split = 0; cp->h_max = 0.0; cp->dx_max = 0.f; + cp->dx_max_sort = 0.f; cp->nodeID = c->nodeID; cp->parent = c; cp->super = NULL; @@ -2091,8 +2092,7 @@ void space_split_recursive(struct space *s, struct cell *c, const struct multipole *m = &cp->multipole->m_pole; /* Contribution to multipole */ - gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM, - s->periodic); + gravity_M2M(&temp, m, c->multipole->CoM, cp->multipole->CoM); gravity_multipole_add(&c->multipole->m_pole, &temp); /* Upper limit of max CoM<->gpart distance */ @@ -2116,6 +2116,10 @@ void space_split_recursive(struct space *s, struct cell *c, /* Take minimum of both limits */ c->multipole->r_max = min(r_max, sqrt(dx * dx + dy * dy + dz * dz)); + c->multipole->r_max_rebuild = c->multipole->r_max; + c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; + c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; + c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; } /* Deal with gravity */ } @@ -2194,6 +2198,10 @@ void space_split_recursive(struct space *s, struct cell *c, c->multipole->CoM[2] = c->loc[2] + c->width[2] / 2.; c->multipole->r_max = 0.; } + c->multipole->r_max_rebuild = c->multipole->r_max; + c->multipole->CoM_rebuild[0] = c->multipole->CoM[0]; + c->multipole->CoM_rebuild[1] = c->multipole->CoM[1]; + c->multipole->CoM_rebuild[2] = c->multipole->CoM[2]; } } diff --git a/src/space.h b/src/space.h index 8674c39e110694ec303584627840a7ee47644552..c5f588563e5a9fb4b6cb73ac1446514f8149794f 100644 --- a/src/space.h +++ b/src/space.h @@ -45,7 +45,7 @@ #define space_maxcount_default 10000 #define space_max_top_level_cells_default 12 #define space_stretch 1.10f -#define space_maxreldx 0.25f +#define space_maxreldx 0.1f /* Maximum allowed depth of cell splits. */ #define space_cell_maxdepth 52 diff --git a/src/statistics.c b/src/statistics.c index 297d88c1f25c1b5b42be8edcd1282fd437964894..57d60bcb1b247c9616c859b7ac8a475acdcd878f 100644 --- a/src/statistics.c +++ b/src/statistics.c @@ -35,6 +35,7 @@ #include "cooling.h" #include "engine.h" #include "error.h" +#include "gravity.h" #include "hydro.h" #include "threadpool.h" @@ -164,10 +165,12 @@ void stats_collect_part_mapper(void *map_data, int nr_parts, void *extra_data) { stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); stats.E_int += m * hydro_get_internal_energy(p); stats.E_rad += cooling_get_radiated_energy(xp); - stats.E_pot_self += 0.f; - if (gp != NULL) + if (gp != NULL) { + stats.E_pot_self += m * gravity_get_potential(gp); stats.E_pot_ext += m * external_gravity_get_potential_energy( time, potential, phys_const, gp); + } + /* Collect entropy */ stats.entropy += m * hydro_get_entropy(p); } @@ -224,7 +227,7 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts, gp->v_full[1] + gp->a_grav[1] * dt, gp->v_full[2] + gp->a_grav[2] * dt}; - const float m = gp->mass; + const float m = gravity_get_mass(gp); const double x[3] = {gp->x[0], gp->x[1], gp->x[2]}; /* Collect mass */ @@ -242,7 +245,7 @@ void stats_collect_gpart_mapper(void *map_data, int nr_gparts, /* Collect energies. */ stats.E_kin += 0.5f * m * (v[0] * v[0] + v[1] * v[1] + v[2] * v[2]); - stats.E_pot_self += 0.f; + stats.E_pot_self += m * gravity_get_potential(gp); stats.E_pot_ext += m * external_gravity_get_potential_energy( time, potential, phys_const, gp); } diff --git a/src/task.c b/src/task.c index 40cb41ffe057ac78dc3b8b073e29eedb052ec2de..e8c35e49a57595a6415c60ce7071ae1c0e3f09b7 100644 --- a/src/task.c +++ b/src/task.c @@ -47,15 +47,27 @@ #include "lock.h" /* Task type names. */ -const char *taskID_names[task_type_count] = { - "none", "sort", "self", - "pair", "sub_self", "sub_pair", - "init", "init_grav", "ghost", - "extra_ghost", "drift", "kick1", - "kick2", "timestep", "send", - "recv", "grav_top_level", "grav_long_range", - "grav_mm", "grav_down", "cooling", - "sourceterms"}; +const char *taskID_names[task_type_count] = {"none", + "sort", + "self", + "pair", + "sub_self", + "sub_pair", + "init_grav", + "ghost", + "extra_ghost", + "drift", + "kick1", + "kick2", + "timestep", + "send", + "recv", + "grav_top_level", + "grav_long_range", + "grav_mm", + "grav_down", + "cooling", + "sourceterms"}; /* Sub-task type names. */ const char *subtaskID_names[task_subtype_count] = { @@ -152,7 +164,6 @@ __attribute__((always_inline)) INLINE static enum task_actions task_acts_on( } break; - case task_type_init: case task_type_kick1: case task_type_kick2: case task_type_timestep: @@ -272,11 +283,6 @@ void task_unlock(struct task *t) { /* Act based on task type. */ switch (type) { - case task_type_init_grav: - cell_munlocktree(ci); - break; - - case task_type_init: case task_type_kick1: case task_type_kick2: case task_type_timestep: @@ -363,12 +369,6 @@ int task_lock(struct task *t) { #endif break; - case task_type_init_grav: - if (ci->mhold) return 0; - if (cell_mlocktree(ci) != 0) return 0; - break; - - case task_type_init: case task_type_kick1: case task_type_kick2: case task_type_timestep: diff --git a/src/task.h b/src/task.h index 02dbcbfe258b3db718b0e217fd2b65e56cc03f34..049f86bdd6b4baf0856745b2b53acda5cca8c9e1 100644 --- a/src/task.h +++ b/src/task.h @@ -34,6 +34,8 @@ /** * @brief The different task types. + * + * Be sure to update the taskID_names array in tasks.c if you modify this list! */ enum task_types { task_type_none = 0, @@ -42,7 +44,6 @@ enum task_types { task_type_pair, task_type_sub_self, task_type_sub_pair, - task_type_init, task_type_init_grav, task_type_ghost, task_type_extra_ghost, @@ -152,9 +153,6 @@ struct task { /*! Should the scheduler skip this task ? */ char skip; - /*! Does this task require the particles to be tightly in the cell ? */ - char tight; - /*! Is this task implicit (i.e. does not do anything) ? */ char implicit; @@ -167,8 +165,9 @@ struct task { #endif #ifdef SWIFT_DEBUG_CHECKS - int ti_run; -#endif + /* When was this task last run? */ + integertime_t ti_run; +#endif /* SWIFT_DEBUG_CHECKS */ } SWIFT_STRUCT_ALIGN; diff --git a/src/timers.c b/src/timers.c index 558ac109c224d939272152aa43684ad842c511de..aa42eee14fc0df3edd5a18340c092b8eea2ffac1 100644 --- a/src/timers.c +++ b/src/timers.c @@ -21,16 +21,21 @@ ******************************************************************************/ /* Config parameters. */ -#include "../config.h" /* This object's header. */ #include "timers.h" +/* Some standard headers. */ +#include <stdio.h> + +/* Local includes. */ +#include "clocks.h" + /* The timers. */ ticks timers[timer_count]; /* Timer names. */ -char *timers_names[timer_count] = { +const char* timers_names[timer_count] = { "none", "prepare", "init", @@ -63,20 +68,27 @@ char *timers_names[timer_count] = { "dosub_pair_gradient", "dosub_pair_force", "dosub_pair_grav", + "doself_subset", "dopair_subset", + "dopair_subset_naive", + "dosub_subset", "do_ghost", "do_extra_ghost", "dorecv_part", "dorecv_gpart", "dorecv_spart", + "do_cooling", "gettask", "qget", "qsteal", + "locktree", "runners", "step", - "do_cooling", }; +/* File to store the timers */ +static FILE* timers_file; + /** * @brief Re-set the timers. * @@ -90,3 +102,43 @@ void timers_reset(unsigned long long mask) { for (int k = 0; k < timer_count; k++) if (mask & (1ull << k)) timers[k] = 0; } + +/** + * @brief Re-set all the timers. + * + */ +void timers_reset_all() { timers_reset(timers_mask_all); } + +/** + * @brief Outputs all the timers to the timers dump file. + * + * @param step The current step. + */ +void timers_print(int step) { + fprintf(timers_file, "%d\t", step); + for (int k = 0; k < timer_count; k++) + fprintf(timers_file, "%.3f\t", clocks_from_ticks(timers[k])); + fprintf(timers_file, "\n"); +} + +/** + * @brief Opens the file to contain the timers info and print a header + * + * @param rank The MPI rank of the file. + */ +void timers_open_file(int rank) { + + char buff[100]; + sprintf(buff, "timers_%d.txt", rank); + timers_file = fopen(buff, "w"); + + fprintf(timers_file, "# timers: \n# step | "); + for (int k = 0; k < timer_count; k++) + fprintf(timers_file, "%s\t", timers_names[k]); + fprintf(timers_file, "\n"); +} + +/** + * @brief Close the file containing the timer info. + */ +void timers_close_file() { fclose(timers_file); } diff --git a/src/timers.h b/src/timers.h index ef84a82eab4c0b11f758d75bb8f530d320707814..08e983a947bc57d9dcc7a432df92c2a4b0a1f7d7 100644 --- a/src/timers.h +++ b/src/timers.h @@ -22,7 +22,10 @@ #ifndef SWIFT_TIMERS_H #define SWIFT_TIMERS_H -/* Includes. */ +/* Config parameters. */ +#include "../config.h" + +/* Local includes. */ #include "atomic.h" #include "cycle.h" #include "inline.h" @@ -66,18 +69,22 @@ enum { timer_dosub_pair_gradient, timer_dosub_pair_force, timer_dosub_pair_grav, + timer_doself_subset, timer_dopair_subset, + timer_dopair_subset_naive, + timer_dosub_subset, timer_do_ghost, timer_do_extra_ghost, timer_dorecv_part, timer_dorecv_gpart, timer_dorecv_spart, + timer_do_cooling, timer_gettask, timer_qget, timer_qsteal, + timer_locktree, timer_runners, timer_step, - timer_do_cooling, timer_count, }; @@ -85,32 +92,34 @@ enum { extern ticks timers[timer_count]; /* The timer names. */ -extern char *timers_names[]; +extern const char *timers_names[]; /* Mask for all timers. */ #define timers_mask_all ((1ull << timer_count) - 1) /* Define the timer macros. */ -#ifdef TIMER -#define TIMER_TIC_ND tic = getticks(); -#define TIMER_TIC2_ND ticks tic2 = getticks(); -#define TIMER_TIC ticks tic = getticks(); +#ifdef SWIFT_USE_TIMERS +#define TIMER_TIC const ticks tic = getticks(); #define TIMER_TOC(t) timers_toc(t, tic) -#define TIMER_TIC2 ticks tic2 = getticks(); +#define TIMER_TIC2 const ticks tic2 = getticks(); #define TIMER_TOC2(t) timers_toc(t, tic2) INLINE static ticks timers_toc(unsigned int t, ticks tic) { - ticks d = (getticks() - tic); + const ticks d = (getticks() - tic); atomic_add(&timers[t], d); return d; } #else #define TIMER_TIC -#define TIMER_TOC(t) +#define TIMER_TOC(t) (void)0 #define TIMER_TIC2 -#define TIMER_TOC2(t) +#define TIMER_TOC2(t) (void)0 #endif /* Function prototypes. */ +void timers_reset_all(); void timers_reset(unsigned long long mask); +void timers_open_file(int rank); +void timers_close_file(); +void timers_print(int step); #endif /* SWIFT_TIMERS_H */ diff --git a/src/vector.h b/src/vector.h index 4091ecdceeb150118ebab772532c2d08edc26aee..ba803a666954fe8c22e7ba5a01147c22cd7f028a 100644 --- a/src/vector.h +++ b/src/vector.h @@ -107,8 +107,15 @@ } /* Performs a horizontal add on the vector and adds the result to a float. */ +#ifdef __ICC #define VEC_HADD(a, b) b += _mm512_reduce_add_ps(a.v) - +#else /* _mm512_reduce_add_ps not present in GCC compiler. \ + TODO: Implement intrinsic version.*/ +#define VEC_HADD(a, b) \ + { \ + for (int i = 0; i < VEC_SIZE; i++) b += a.f[i]; \ + } +#endif /* Calculates the number of set bits in the mask and adds the result to an int. */ #define VEC_FORM_PACKED_MASK(mask, v_mask, pack) \ diff --git a/src/vector_power.h b/src/vector_power.h index f2fd4381751706f6fca1e24daa131d8fd8a0eeba..6e8377586fdbc072e8f894c0805f43c122f8c54f 100644 --- a/src/vector_power.h +++ b/src/vector_power.h @@ -34,8 +34,8 @@ /* Config parameters. */ #include "../config.h" -/* Some standard headers. */ -#include <math.h> +/* Local headers. */ +#include "inline.h" /***************************/ /* 0th order vector powers */ diff --git a/tests/Makefile.am b/tests/Makefile.am index 8a75182323859cb5662b0f7d0e83bd0f25f78b4e..a51b8eb82a17313818ff956ca3f15a232df8df65 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -15,15 +15,14 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # Add the source directory and debug to CFLAGS -AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) -DTIMER - +AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) # List of programs and scripts to run in the test suite TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \ testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \ - testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT \ + testParser.sh testSPHStep test125cells.sh testFFT \ testAdiabaticIndex testRiemannExact testRiemannTRRS testRiemannHLLC \ testMatrixInversion testThreadpool testDump testLogger \ testVoronoi1D testVoronoi2D testVoronoi3D @@ -31,7 +30,7 @@ TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetr # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSPHStep testPair test27cells test125cells testParser \ - testKernel testKernelGrav testFFT testInteractions testMaths \ + testKernel testFFT testInteractions testMaths \ testSymmetry testThreadpool benchmarkInteractions \ testAdiabaticIndex testRiemannExact testRiemannTRRS \ testRiemannHLLC testMatrixInversion testDump testLogger \ @@ -62,8 +61,6 @@ testParser_SOURCES = testParser.c testKernel_SOURCES = testKernel.c -testKernelGrav_SOURCES = testKernelGrav.c - testFFT_SOURCES = testFFT.c testInteractions_SOURCES = testInteractions.c diff --git a/tests/difffloat.py b/tests/difffloat.py index e0f0864372264899c6de1bf2f83ab678b7dd9ead..f989ccb56fb1f008d4a60931cb7165f4f2156ebd 100644 --- a/tests/difffloat.py +++ b/tests/difffloat.py @@ -35,13 +35,18 @@ file1 = sys.argv[1] file2 = sys.argv[2] number_to_check = -1 -if len(sys.argv) == 5: - number_to_check = int(sys.argv[4]) - fileTol = "" if len(sys.argv) >= 4: fileTol = sys.argv[3] +if len(sys.argv) >= 5: + number_to_check = int(sys.argv[4]) + +if len(sys.argv) == 6: + ignoreSmallRhoDh = int(sys.argv[5]) +else: + ignoreSmallRhoDh = 0 + data1 = loadtxt(file1) data2 = loadtxt(file2) if fileTol != "": @@ -102,8 +107,11 @@ for i in range(n_lines_to_check): print "" error = True - if abs(data1[i,j]) < 1e-6 and + abs(data2[i,j]) < 1e-6 : continue - + if abs(data1[i,j]) + abs(data2[i,j]) < 1e-6 : continue + + # Ignore pathological cases with rho_dh + if ignoreSmallRhoDh and j == 8 and abs(data1[i,j]) < 2e-4: continue + if( rel_diff > 1.1*relTol[j]): print "Relative difference larger than tolerance (%e) for particle %d, column %d:"%(relTol[j], i,j) print "%10s: a = %e"%("File 1", data1[i,j]) diff --git a/tests/test125cells.c b/tests/test125cells.c index 517833eb2b1fe84c2d9811d090294acab2511b19..76112029016b7e28eeeaf69967a3b1ba3f8f0ef3 100644 --- a/tests/test125cells.c +++ b/tests/test125cells.c @@ -201,8 +201,9 @@ void get_solution(const struct cell *main_cell, struct solution_part *solution, } } -void reset_particles(struct cell *c, enum velocity_field vel, - enum pressure_field press, float size, float density) { +void reset_particles(struct cell *c, struct hydro_space *hs, + enum velocity_field vel, enum pressure_field press, + float size, float density) { for (int i = 0; i < c->count; ++i) { @@ -211,6 +212,8 @@ void reset_particles(struct cell *c, enum velocity_field vel, set_velocity(p, vel, size); set_energy_state(p, press, size, density); + hydro_init_part(p, hs); + #if defined(GIZMO_SPH) || defined(SHADOWFAX_SPH) float volume = p->conserved.mass / density; #if defined(GIZMO_SPH) @@ -326,6 +329,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->count = count; cell->gcount = 0; cell->dx_max = 0.; + cell->dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -336,6 +340,7 @@ struct cell *make_cell(size_t n, const double offset[3], double size, double h, cell->ti_old = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; + cell->ti_sort = 0; // shuffle_particles(cell->parts, cell->count); @@ -537,7 +542,11 @@ int main(int argc, char *argv[]) { /* Build the infrastructure */ struct space space; - space.periodic = 0; + space.periodic = 1; + space.dim[0] = 3.; + space.dim[1] = 3.; + space.dim[2] = 3.; + hydro_space_init(&space.hs, &space); struct phys_const prog_const; prog_const.const_newton_G = 1.f; @@ -601,18 +610,13 @@ int main(int argc, char *argv[]) { const ticks tic = getticks(); - /* Start with a gentle kick */ - // runner_do_kick1(&runner, main_cell, 0); - - /* And a gentle drift */ - // runner_do_drift_particles(&runner, main_cell, 0); + /* Initialise the particles */ + for (int j = 0; j < 125; ++j) + runner_do_drift_particles(&runner, cells[j], 0); /* First, sort stuff */ for (int j = 0; j < 125; ++j) runner_do_sort(&runner, cells[j], 0x1FFF, 0); - /* Initialise the particles */ - for (int j = 0; j < 125; ++j) runner_do_init(&runner, cells[j], 0); - /* Do the density calculation */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) @@ -686,8 +690,6 @@ int main(int argc, char *argv[]) { /* Finally, give a gentle kick */ runner_do_end_force(&runner, main_cell, 0); - // runner_do_kick2(&runner, main_cell, 0); - const ticks toc = getticks(); time += toc - tic; @@ -704,20 +706,21 @@ int main(int argc, char *argv[]) { message("SWIFT calculation took : %15lli ticks.", time / runs); for (int j = 0; j < 125; ++j) - reset_particles(cells[j], vel, press, size, rho); + reset_particles(cells[j], &space.hs, vel, press, size, rho); /* NOW BRUTE-FORCE CALCULATION */ const ticks tic = getticks(); - /* Kick the central cell */ - // runner_do_kick1(&runner, main_cell, 0); +/* Kick the central cell */ +// runner_do_kick1(&runner, main_cell, 0); - /* And drift it */ - runner_do_drift_particles(&runner, main_cell, 0); +/* And drift it */ +// runner_do_drift_particles(&runner, main_cell, 0); - /* Initialise the particles */ - for (int j = 0; j < 125; ++j) runner_do_init(&runner, cells[j], 0); +/* Initialise the particles */ +// for (int j = 0; j < 125; ++j) runner_do_drift_particles(&runner, cells[j], +// 0); /* Do the density calculation */ #if !(defined(MINIMAL_SPH) && defined(WITH_VECTORIZATION)) diff --git a/tests/test27cells.c b/tests/test27cells.c index 9e79c097462203465c03ea056569c7f448125d7e..bd827b68e90ea5f4e9d5577612e6cecda2edf83a 100644 --- a/tests/test27cells.c +++ b/tests/test27cells.c @@ -159,6 +159,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->h_max = h; cell->count = count; cell->dx_max = 0.; + cell->dx_max_sort = 0.; cell->width[0] = size; cell->width[1] = size; cell->width[2] = size; @@ -169,6 +170,7 @@ struct cell *make_cell(size_t n, double *offset, double size, double h, cell->ti_old = 8; cell->ti_end_min = 8; cell->ti_end_max = 8; + cell->ti_sort = 8; shuffle_particles(cell->parts, cell->count); @@ -407,13 +409,20 @@ int main(int argc, char *argv[]) { /* Build the infrastructure */ struct space space; - space.periodic = 0; + space.periodic = 1; + space.dim[0] = 3.; + space.dim[1] = 3.; + space.dim[2] = 3.; + + struct hydro_props hp; + hp.h_max = FLT_MAX; struct engine engine; engine.s = &space; engine.time = 0.1f; engine.ti_current = 8; engine.max_active_bin = num_time_bins; + engine.hydro_properties = &hp; struct runner runner; runner.e = &engine; @@ -429,6 +438,8 @@ int main(int argc, char *argv[]) { cells[i * 9 + j * 3 + k] = make_cell(particles, offset, size, h, rho, &partId, perturbation, vel); + runner_do_drift_particles(&runner, cells[i * 9 + j * 3 + k], 0); + runner_do_sort(&runner, cells[i * 9 + j * 3 + k], 0x1FFF, 0); } } @@ -542,8 +553,7 @@ int main(int argc, char *argv[]) { dump_particle_fields(outputFileName, main_cell, cells); /* Check serial results against the vectorised results. */ - if (check_results(main_cell->parts, vec_parts, main_cell->count, - threshold)) + if (check_results(main_cell->parts, vec_parts, main_cell->count, threshold)) message("Differences found..."); /* Output timing */ diff --git a/tests/test27cellsPerturbed.sh.in b/tests/test27cellsPerturbed.sh.in index 30498594b659101216b51dfea2346fa9230dbc97..1eb7b31bcd668e7362a9eb907be501aa8016abb8 100755 --- a/tests/test27cellsPerturbed.sh.in +++ b/tests/test27cellsPerturbed.sh.in @@ -10,7 +10,7 @@ do if [ -e brute_force_27_perturbed.dat ] then - python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 + python @srcdir@/difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat @srcdir@/tolerance_27_perturbed.dat 6 1 else exit 1 fi diff --git a/tests/testKernel.c b/tests/testKernel.c index 13f4e36534eb11a4c8f7ba9c19a48de6599e31f5..0f627675cbfbcbc6fe926ee6617f83d3aeacb5de 100644 --- a/tests/testKernel.c +++ b/tests/testKernel.c @@ -39,7 +39,7 @@ int main() { const float numPoints_inv = 1. / numPoints; for (int i = 0; i < numPoints; ++i) { - u[i] = i * 2.5f * numPoints_inv / h; + u[i] = i * 2.25f * numPoints_inv / h; } for (int i = 0; i < numPoints; ++i) { @@ -55,18 +55,22 @@ int main() { #ifdef WITH_VECTORIZATION + printf("\nVector Output for kernel_deval_1_vec\n"); + printf("-------------\n"); + + /* Test vectorised kernel that uses one vector. */ for (int i = 0; i < numPoints; i += VEC_SIZE) { vector vx, vx_h; vector W_vec, dW_vec; for (int j = 0; j < VEC_SIZE; j++) { - vx.f[j] = (i + j) * 2.5f / numPoints; + vx.f[j] = (i + j) * 2.25f / numPoints; } vx_h.v = vx.v / vec_set1(h); - kernel_deval_vec(&vx_h, &W_vec, &dW_vec); + kernel_deval_1_vec(&vx_h, &W_vec, &dW_vec); for (int j = 0; j < VEC_SIZE; j++) { printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, @@ -85,6 +89,63 @@ int main() { } } + printf("\nVector Output for kernel_deval_2_vec\n"); + printf("-------------\n"); + + /* Test vectorised kernel that uses two vectors. */ + for (int i = 0; i < numPoints; i += VEC_SIZE) { + + vector vx, vx_h; + vector W_vec, dW_vec; + + vector vx_2, vx_h_2; + vector W_vec_2, dW_vec_2; + + for (int j = 0; j < VEC_SIZE; j++) { + vx.f[j] = (i + j) * 2.25f / numPoints; + vx_2.f[j] = (i + j) * 2.25f / numPoints; + } + + vx_h.v = vx.v / vec_set1(h); + vx_h_2.v = vx_2.v / vec_set1(h); + + kernel_deval_2_vec(&vx_h, &W_vec, &dW_vec, &vx_h_2, &W_vec_2, &dW_vec_2); + + /* Check first vector results. */ + for (int j = 0; j < VEC_SIZE; j++) { + printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, + h * kernel_gamma, vx.f[j], W_vec.f[j], dW_vec.f[j]); + + if (fabsf(W_vec.f[j] - W[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j], + W_vec.f[j]); + return 1; + } + if (fabsf(dW_vec.f[j] - dW[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j], + dW_vec.f[j]); + return 1; + } + } + + /* Check second vector results. */ + for (int j = 0; j < VEC_SIZE; j++) { + printf("%2d: h= %f H= %f x=%f W(x,h)=%f dW(x,h)=%f\n", i + j, h, + h * kernel_gamma, vx_2.f[j], W_vec_2.f[j], dW_vec_2.f[j]); + + if (fabsf(W_vec_2.f[j] - W[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", W[i + j], + W_vec_2.f[j]); + return 1; + } + if (fabsf(dW_vec_2.f[j] - dW[i + j]) > 2e-7) { + printf("Invalid value ! scalar= %e, vector= %e\n", dW[i + j], + dW_vec_2.f[j]); + return 1; + } + } + } + printf("\nAll values are consistent\n"); #endif diff --git a/tests/testVoronoi2D.c b/tests/testVoronoi2D.c index 7a35961952079494b0d1567db57abd29fa1df35b..509d3ab69976fa8618db389ebd87eedb9ea34409 100644 --- a/tests/testVoronoi2D.c +++ b/tests/testVoronoi2D.c @@ -107,8 +107,10 @@ int main() { Atot = 0.0f; /* print the cells to the stdout */ for (i = 0; i < TESTVORONOI2D_NUMCELL; ++i) { +#ifdef VORONOI_VERBOSE printf("Cell %i:\n", i); voronoi_print_cell(&cells[i]); +#endif voronoi_cell_finalize(&cells[i]); Atot += cells[i].volume; } @@ -185,8 +187,10 @@ int main() { Atot = 0.0f; /* print the cells to the stdout */ for (i = 0; i < 100; ++i) { +#ifdef VORONOI_VERBOSE printf("Cell %i:\n", i); voronoi_print_cell(&cells[i]); +#endif voronoi_cell_finalize(&cells[i]); Atot += cells[i].volume; } diff --git a/tests/tolerance_27_perturbed.dat b/tests/tolerance_27_perturbed.dat index 53de4ec7632039a56a3757488881e890296e3ac8..b6ed8c2c18808156056f9c5816212866c2dfa998 100644 --- a/tests/tolerance_27_perturbed.dat +++ b/tests/tolerance_27_perturbed.dat @@ -1,3 +1,3 @@ # ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-6 1e-4 5e-5 2e-3 3.1e-6 3e-6 3e-6 3e-6 - 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1.2e-2 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 2e-3 1e-5 1e-4 2e-5 2e-3 2e-3 2e-3 diff --git a/theory/Multipoles/bibliography.bib b/theory/Multipoles/bibliography.bib index abdb5855f52aefd9b4562851946d16bd402db68a..12e274dd63093ba1e14750249f2538c092e5268a 100644 --- a/theory/Multipoles/bibliography.bib +++ b/theory/Multipoles/bibliography.bib @@ -24,4 +24,75 @@ archivePrefix = "arXiv", pages={13}, year={2006} } - + +@ARTICLE{Monaghan1985, + author = {{Monaghan}, J.~J. and {Lattanzio}, J.~C.}, + title = "{A refined particle method for astrophysical problems}", + journal = {\aap}, + keywords = {Computational Astrophysics, Gravitational Collapse, Gravitational Fields, Many Body Problem, Molecular Clouds, Stellar Evolution, Angular Momentum, Binary Stars, Computational Grids, Interpolation, Kernel Functions, Particle Mass, Stellar Orbits}, + year = 1985, + month = aug, + volume = 149, + pages = {135-143}, + adsurl = {http://adsabs.harvard.edu/abs/1985A%26A...149..135M}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Price2007, + author = {{Price}, D.~J. and {Monaghan}, J.~J.}, + title = "{An energy-conserving formalism for adaptive gravitational force softening in smoothed particle hydrodynamics and N-body codes}", + journal = {\mnras}, + eprint = {astro-ph/0610872}, + keywords = {gravitation, hydrodynamics, methods: N-body simulations, methods: numerical}, + year = 2007, + month = feb, + volume = 374, + pages = {1347-1358}, + doi = {10.1111/j.1365-2966.2006.11241.x}, + adsurl = {http://adsabs.harvard.edu/abs/2007MNRAS.374.1347P}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Dehnen2001, + author = {{Dehnen}, W.}, + title = "{Towards optimal softening in three-dimensional N-body codes - I. Minimizing the force error}", + journal = {\mnras}, + eprint = {astro-ph/0011568}, + keywords = {STELLAR DYNAMICS, METHODS: N-BODY SIMULATIONS, METHODS: NUMERICAL}, + year = 2001, + month = jun, + volume = 324, + pages = {273-291}, + doi = {10.1046/j.1365-8711.2001.04237.x}, + adsurl = {http://adsabs.harvard.edu/abs/2001MNRAS.324..273D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Hernquist1989, + author = {{Hernquist}, L. and {Katz}, N.}, + title = "{TREESPH - A unification of SPH with the hierarchical tree method}", + journal = {\apjs}, + keywords = {Computational Fluid Dynamics, Computerized Simulation, Data Smoothing, Magnetohydrodynamics, Trees (Mathematics), Dynamical Systems, Many Body Problem, Monte Carlo Method, Spatial Resolution}, + year = 1989, + month = jun, + volume = 70, + pages = {419-446}, + doi = {10.1086/191344}, + adsurl = {http://adsabs.harvard.edu/abs/1989ApJS...70..419H}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@Article{Wendland1995, +author="Wendland, Holger", +title="Piecewise polynomial, positive definite and compactly supported radial functions of minimal degree", +journal="Advances in Computational Mathematics", +year="1995", +volume="4", +number="1", +pages="389--396", +abstract="We construct a new class of positive definite and compactly supported radial functions which consist of a univariate polynomial within their support. For given smoothness and space dimension it is proved that they are of minimal degree and unique up to a constant factor. Finally, we establish connections between already known functions of this kind.", +issn="1572-9044", +doi="10.1007/BF02123482", +url="http://dx.doi.org/10.1007/BF02123482" +} + diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex index 6948708622dbf0cfc05daa9d63917f7a3017bfc2..fcd727a89abe95bba69b23c58ce5067c8cc53211 100644 --- a/theory/Multipoles/fmm_standalone.tex +++ b/theory/Multipoles/fmm_standalone.tex @@ -1,4 +1,4 @@ -\documentclass[fleqn, usenatbib, useAMS,a4paper]{mnras} +\documentclass[fleqn, usenatbib, useAMS, a4paper]{mnras} \usepackage{graphicx} \usepackage{amsmath,paralist,xcolor,xspace,amssymb} \usepackage{times} @@ -13,10 +13,18 @@ \maketitle +We use the multi-index notation of \cite{Dehnen2014} to simplify expressions. + +\input{potential_softening} \input{gravity_derivatives} \bibliographystyle{mnras} \bibliography{./bibliography.bib} +\appendix +\onecolumn +\input{potential_derivatives} + + \end{document} diff --git a/theory/Multipoles/gravity_derivatives.tex b/theory/Multipoles/gravity_derivatives.tex index 913bdf752c666aac5913c6e6452bdb06f3fcdc86..e4c7b1565ab6c82de5623d5a643c3a8bd1fa513f 100644 --- a/theory/Multipoles/gravity_derivatives.tex +++ b/theory/Multipoles/gravity_derivatives.tex @@ -1,5 +1,3 @@ -We use the multi-index notation of \cite{Dehnen2014} to simplify expressions. - \subsection{Derivatives of the gravitational potential} The calculation of all the diff --git a/theory/Multipoles/potential.py b/theory/Multipoles/potential.py new file mode 100644 index 0000000000000000000000000000000000000000..559f590762a3cbef171c5dd584cbc517879a2cec --- /dev/null +++ b/theory/Multipoles/potential.py @@ -0,0 +1,181 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## +import matplotlib +matplotlib.use("Agg") +from pylab import * +from scipy import integrate +from scipy.optimize import curve_fit +from scipy.optimize import fsolve +from matplotlib.font_manager import FontProperties +import numpy +import math + +params = {'axes.labelsize': 9, +'axes.titlesize': 10, +'font.size': 10, +'legend.fontsize': 10, +'xtick.labelsize': 8, +'ytick.labelsize': 8, +'text.usetex': True, +'figure.figsize' : (3.15,3.15), +'figure.subplot.left' : 0.115, +'figure.subplot.right' : 0.99 , +'figure.subplot.bottom' : 0.065 , +'figure.subplot.top' : 0.99 , +'figure.subplot.wspace' : 0. , +'figure.subplot.hspace' : 0. , +'lines.markersize' : 6, +'lines.linewidth' : 3., +'text.latex.unicode': True +} +rcParams.update(params) +rc('font',**{'family':'sans-serif','sans-serif':['Times']}) + +# Parameters +epsilon = 2. +r_min = 0. +r_max = 4 +r_max_plot = 2.6 + +# Radius +r = linspace(r_min, r_max, 401) +r[0] += 1e-9 +u = r / epsilon + +# Newtonian solution +phi_newton = 1. / r +F_newton = 1. / r**2 +W_newton = 0. * r + +# Softened potential +phi = np.zeros(np.size(r)) +W = np.zeros(np.size(r)) +F = np.zeros(np.size(r)) +for i in range(np.size(r)): + if r[i] > epsilon: + phi[i] = 1. / r[i] + W[i] = 0. + F[i] = 1. / r[i]**2 + else: + phi[i] = (-1./epsilon) * (3.*u[i]**7 - 15.*u[i]**6 + 28.*u[i]**5 - 21.*u[i]**4 + 7.*u[i]**2 - 3.) + W[i] = (21. / (2.*math.pi)) * (4.*u[i]**5 - 15.*u[i]**4 + 20.*u[i]**3 - 10.*u[i]**2 + 1.) / epsilon**3 + F[i] = (1./epsilon**2) * (21.*u[i]**6 - 90*u[i]**5 + 140.*u[i]**4 - 84.*u[i]**3 + 14*u[i]) + +plummer_equivalent_factor = phi[0] * epsilon + +print "Plummer-equivalent factor:", plummer_equivalent_factor + +epsilon_plummer = epsilon / plummer_equivalent_factor + +# Plummer potential +phi_plummer = (1. / epsilon_plummer) * (1 + (r / epsilon_plummer)**2)**(-1./2.) +F_plummer = (1. / epsilon_plummer**3) * r / (1 + (r / epsilon_plummer )**2)**(3./2.) +def eta_plummer(r): + return (3. / (4.*math.pi)) * 1. / (1 + r**2)**(5./2.) +W_plummer = (1. / epsilon_plummer**3) * eta_plummer(r / epsilon_plummer) + + +# Gadget-2 potential +epsilon_gadget = epsilon #/ plummer_equivalent_factor * 2.8 +phi_gadget2 = np.zeros(np.size(r)) +W_gadget2 = np.zeros(np.size(r)) +F_gadget2 = np.zeros(np.size(r)) +for i in range(np.size(r)): + if r[i] > epsilon_gadget: + phi_gadget2[i] = 1. / r[i] + W_gadget2[i] = 0. + F_gadget2[i] = 1. / r[i]**2 + elif r[i] > epsilon_gadget / 2.: + phi_gadget2[i] = -((32/3.)*u[i]**2 - 16.*u[i]**3 + (96./10.)*u[i]**4 - (64./30.)*u[i]**5 - (16./5.) + 1./(15.*u[i]) )/ (epsilon_gadget) + W_gadget2[i] = (8. / math.pi) * (2. * (1- u[i])**3) / epsilon_gadget**3 + F_gadget2[i] = u[i] * (21.333333 - 48*u[i] + 38.4*u[i]**2 - 10.6666667*u[i]**3 - 0.06666667*u[i]**-3) / epsilon_gadget**2 + else: + phi_gadget2[i] = -((16./3.)*u[i]**2 - (96./10.)*u[i]**4 + (64./10.)*u[i]**5 - (14./5.) ) / (epsilon_gadget) + W_gadget2[i] = (8. / math.pi) * (1. - 6.*u[i]**2 + 6.*u[i]**3) / epsilon_gadget**3 + F_gadget2[i] = u[i] * (10.666667 + u[i]**2 * (32. * u[i] - 38.4)) / epsilon_gadget**2 + +figure() +colors=['#4477AA', '#CC6677', '#DDCC77', '#117733'] + +# Density +subplot(311) +plot(r, W_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(r, W_plummer, ':', lw=1.4, label="${\\rm Plummer}$", color=colors[1]) +plot(r, W_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) +plot(r, W, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +plot([epsilon, epsilon], [0, 10], 'k-', alpha=0.5, lw=0.5) +plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) + +legend(loc="upper right", frameon=True, handletextpad=0.1, handlelength=3.2, fontsize=8) + +xlim(0,r_max_plot) +xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""]) + +ylim(0., 0.84) +yticks([0, 0.2, 0.4, 0.6, 0.8], ["$0$", "$0.2$", "$0.4$", "$0.6$", "$0.8$"]) +ylabel("$\\rho(r)$", labelpad=2) + +# Potential +subplot(312) +plot(r, phi_newton, '--', lw=1.4, label="${\\rm Newtonian}$", color=colors[0]) +plot(r, phi_plummer, ':', lw=1.4, label="${\\rm Plummer}$", color=colors[1]) +plot(r, phi_gadget2, '-', lw=1.4, label="${\\rm Gadget}$", color=colors[2]) +plot(r, phi, '-', lw=1.4, label="${\\rm SWIFT}$", color=colors[3]) +plot([epsilon, epsilon], [-10, 10], 'k-', alpha=0.5, lw=0.5) +plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) + +ylim(0, 2.3) +ylabel("$|\\phi(r)|$", labelpad=1) +#yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)]) + +xlim(0,r_max_plot) +xticks([0., 0.5, 1., 1.5, 2., 2.5], ["", "", "", "", "", ""]) + +# Force +subplot(313) +plot(r, F_newton, '--', lw=1.4, color=colors[0]) +plot(r, F_plummer, ':', lw=1.4, color=colors[1]) +plot(r, F_gadget2, '-', lw=1.4, color=colors[2]) +plot(r, F, '-', lw=1.4, color=colors[3]) +plot([epsilon, epsilon], [0, 10], 'k-', alpha=0.5, lw=0.5) +plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) +text(epsilon+0.03, 0.05, "$\\epsilon$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8) +text(epsilon/plummer_equivalent_factor+0.03, 0.05, "$\\epsilon_{\\rm Plummer}$", color='k', alpha=0.5, rotation=90, va="bottom", ha="left", fontsize=8) + +xlim(0,r_max_plot) +xticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0./epsilon), "", "$%.1f$"%(1./epsilon), "", "$%.1f$"%(2./epsilon)]) +xlabel("$r/H$", labelpad=-7) + +ylim(0, 0.95) +ylabel("$|\\overrightarrow{\\nabla}\\phi(r)|$", labelpad=0) + +savefig("potential.pdf") + + + + +#Construct potential +# phi = np.zeros(np.size(r)) +# for i in range(np.size(r)): +# if r[i] > 2*epsilon: +# phi[i] = 1./ r[i] +# elif r[i] > epsilon: +# phi[i] = -(1./epsilon) * ((32./3.)*u[i]**2 - (48./3.)*u[i]**3 + (38.4/4.)*u[i]**4 - (32./15.)*u[i]**5 + (2./30.)*u[i]**(-1) - (9/5.)) +# else: +# phi[i] = -(1./epsilon) * ((32./6.)*u[i]**2 - (38.4/4.)*u[i]**4 + (32./5.)*u[i]**4 - (7./5.)) diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex new file mode 100644 index 0000000000000000000000000000000000000000..56184ce98902d76ad53ce1d49e3d6d67dfc33ac4 --- /dev/null +++ b/theory/Multipoles/potential_derivatives.tex @@ -0,0 +1,75 @@ +\subsection{Derivatives of the potential} + +For completeness, we give here the full expression for the first few +derivatives of the potential that are used in our FMM scheme. We use +the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$ and +$u=r/H$. Starting from the potential (Eq. \ref{eq:fmm:potential}, +reproduced here for clarity), +\begin{align} +D_{000}(\mathbf{r}) = \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +\frac{1}{H} \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right) & \mbox{if} & u < 1,\\ +\frac{1}{r} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} +we can construct the higher order terms by successively applying the +"chain rule". We show examples of the first few relevant ones here. + +\begin{align} +D_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +-\frac{r_x}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ +-\frac{r_x}{r^3} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} + +\begin{align} +D_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +\frac{r_x^2}{H^5}\left(-105u^3+360u^2-420u+168\right) - +\frac{1}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ +3\frac{r_x^2}{r^5} - \frac{1}{r^3} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} + +\begin{align} +D_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +\frac{r_xr_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ +3\frac{r_xr_y}{r^5} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} + +\begin{align} +D_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +-\frac{r_x^3}{H^7} \left(315u - 720 + 420u^{-1}\right) + +\frac{3r_x}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ +-15\frac{r_x^3}{r^7} + 9 \frac{r_x}{r^5} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} + +\begin{align} +D_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +-\frac{r_x^2r_y}{H^7} \left(315u - 720 + 420u^{-1}\right) + +\frac{r_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ +-15\frac{r_x^2r_y}{r^7} + 3 \frac{r_y}{r^5} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} + + +\begin{align} +D_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \phi (\mathbf{r},H) = +\left\lbrace\begin{array}{rcl} +-\frac{r_xr_yr_z}{H^7} \left(315u - 720 + 420u^{-1}\right) & \mbox{if} & u < 1,\\ +-15\frac{r_xr_yr_z}{r^7} & \mbox{if} & u \geq 1, +\end{array} +\right.\nonumber +\end{align} diff --git a/theory/Multipoles/potential_softening.tex b/theory/Multipoles/potential_softening.tex new file mode 100644 index 0000000000000000000000000000000000000000..1186a9cec377fd8daa94e14d024115f95ecfdc99 --- /dev/null +++ b/theory/Multipoles/potential_softening.tex @@ -0,0 +1,52 @@ +\subsection{Gravitational softening} + +To avoid artificial two-body relaxation, the Dirac +$\delta$-distribution of particles is convolved with a softening +kernel of a given fixed, but time-variable, scale-length +$\epsilon$. Instead of the commonly used spline kernel of +\cite{Monaghan1985} (e.g. in \textsc{Gadget}), we use a C2 kernel +\citep{Wendland1995} which leads to an expression for the force that +is cheaper to compute and has a very similar overall shape. We set +$\tilde\delta(\mathbf{x}) = \rho(|\mathbf{x}|) = W(|\mathbf{x}|, +3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by + +\begin{align} +W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\ +&\left\lbrace\begin{array}{rcl} +4u^5 - 15u^4 + 20u^3 - 10u^2 + 1 & \mbox{if} & u < 1,\\ +0 & \mbox{if} & u \geq 1, +\end{array} +\right. +\end{align} +and $u = r/H$. The potential $\phi(r,H)$ corresponding to this density distribution reads +\begin{align} +\phi = +\left\lbrace\begin{array}{rcl} +\frac{1}{H} (-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3) & \mbox{if} & u < 1,\\ +\frac{1}{r} & \mbox{if} & u \geq 1. +\end{array} +\right. +\label{eq:fmm:potential} +\end{align} + +These choices, lead to a potential at $|\mathbf{x}| = 0$ equal to the +central potential of a Plummer sphere (i.e. $1/\epsilon_{\rm +Plummer}$)\footnote{Note the factor $3$ in the definition of +$\rho(|\mathbf{x}|)$ which differs from the factor $2.8$ used +in \textsc{Gadget} as a consequence of the change of kernel +shape.}. The softened density profile, its corresponding potential and +resulting forces are shown on Fig. \ref{fig:fmm:softening} (for +details see Sec. 2 of~\cite{Price2007}). + + +\begin{figure} +\includegraphics[width=\columnwidth]{potential.pdf} +\caption{The density (top), potential (middle) and forces (bottom) of +generated py a point mass in our softened gravitational scheme (for +completeness, we chose $\epsilon=2$). A +Plummer-equivalent sphere is shown for comparison. The spline kernel +of \citet{Monaghan1985}, used in \textsc{Gadget}, is shown for +comparison but note that it has not been re-scaled to match the +Plummer-sphere potential at $r=0$.} +\label{fig:fmm:softening} +\end{figure} diff --git a/theory/Multipoles/run.sh b/theory/Multipoles/run.sh index 34304a37dc1cb1fad3a7442544a8df793ea94d35..f25d407cd4ffe679a272f352798817f7c0c4e55a 100755 --- a/theory/Multipoles/run.sh +++ b/theory/Multipoles/run.sh @@ -1,5 +1,5 @@ #!/bin/bash -python kernels.py +python potential.py pdflatex -jobname=fmm fmm_standalone.tex bibtex fmm.aux pdflatex -jobname=fmm fmm_standalone.tex