diff --git a/.gitignore b/.gitignore index dba88cee0989e00baef51d9e75a42b1141d15714..3521b353ec4b1c4dea7192047e197596f083fe99 100644 --- a/.gitignore +++ b/.gitignore @@ -56,6 +56,8 @@ tests/testSingle tests/testTimeIntegration tests/testSPHStep tests/testKernel +tests/testKernelGrav +tests/testFFT tests/testInteractions tests/testSymmetry tests/testMaths diff --git a/configure.ac b/configure.ac index bbe2945679cb4f49523cd90d4edfb7c0224cfe5d..483937a9ce4b166410ee42e312b24b13551b5d6a 100644 --- a/configure.ac +++ b/configure.ac @@ -417,6 +417,17 @@ if test "$ac_cv_func_pthread_setaffinity_np" = "yes"; then fi fi +# Check for FFTW +have_fftw3="no" +AC_CHECK_HEADER([fftw3.h]) +if test "$ac_cv_header_fftw3_h" = "yes"; then + AC_CHECK_LIB([fftw3],[fftw_malloc], [AC_DEFINE([HAVE_FFTW], + [1],[Defined if FFTW 3.x exists.])] ) + FFTW_LIBS="-lfftw3" + have_fftw3="yes" +fi +AC_SUBST([FFTW_LIBS]) + # Check for Intel intrinsics header optionally used by vector.h. AC_CHECK_HEADERS([immintrin.h]) @@ -503,6 +514,7 @@ AC_MSG_RESULT([ HDF5 enabled : $with_hdf5 - parallel : $have_parallel_hdf5 Metis enabled : $have_metis + FFTW3 enabled : $have_fftw3 libNUMA enabled : $have_numa Using tcmalloc : $have_tcmalloc CPU profiler : $have_profiler diff --git a/examples/Makefile.am b/examples/Makefile.am index 1fedca0c3847c3e4d7942de09cbf409bcd69b8c6..187abcf9898b8024b4e4f5089de7ca51e6dd2e3c 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) AM_LDFLAGS = $(HDF5_LDFLAGS) # Extra libraries. -EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) +EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS) # MPI libraries. MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS) diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py index 449d780fb31bc23dd194f772be45d35e6b0bbe3f..2aee89798a5b8bbd425a6b73528779fb1aa7db23 100644 --- a/examples/UniformDMBox/makeIC.py +++ b/examples/UniformDMBox/makeIC.py @@ -26,7 +26,7 @@ from numpy import * # with a density of 1 # Parameters -periodic= 1 # 1 For periodic box +periodic= 0 # 1 For periodic box boxSize = 1. rho = 1. L = int(sys.argv[1]) # Number of particles along one axis diff --git a/examples/main.c b/examples/main.c index 9cda03c14c364974a18e36c84e60227a01970dd5..30e27cc55a6ec508dc352f1acc7f2eed71d71bdd 100644 --- a/examples/main.c +++ b/examples/main.c @@ -399,8 +399,8 @@ int main(int argc, char *argv[]) { /* Initialize the space with these data. */ if (myrank == 0) clocks_gettime(&tic); struct space s; - space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic, talking, - dry_run); + space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic, + with_self_gravity, talking, dry_run); if (myrank == 0) { clocks_gettime(&toc); message("space_init took %.3f %s.", clocks_diff(&tic, &toc), diff --git a/src/Makefile.am b/src/Makefile.am index c8554ce61940650898cfb21e282e0a6d174c79f1..4aa7278ba5162ec106fe25f10529f766f9a703a8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,7 +19,7 @@ AM_CFLAGS = -DTIMER $(HDF5_CPPFLAGS) # Assign a "safe" version number -AM_LDFLAGS = $(HDF5_LDFLAGS) -version-info 0:0:0 +AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0 # The git command, if available. GIT_CMD = @GIT_CMD@ @@ -48,17 +48,18 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c \ - physical_constants.c potentials.c hydro_properties.c + kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \ + physical_constants.c potentials.c hydro_properties.c \ + runner_doiact_fft.c # Include files for distribution, not installation. nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \ - vector.h runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h kick.h \ - timestep.h drift.h adiabatic_index.h io_properties.h \ + kernel_long_gravity.h vector.h runner_doiact.h runner_doiact_grav.h runner_doiact_fft.h \ + units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \ gravity.h gravity_io.h \ gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \ gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \ - hydro.h hydro_io.h \ + hydro.h hydro_io.h \ hydro/Minimal/hydro.h hydro/Minimal/hydro_iact.h hydro/Minimal/hydro_io.h \ hydro/Minimal/hydro_debug.h hydro/Minimal/hydro_part.h \ hydro/Default/hydro.h hydro/Default/hydro_iact.h hydro/Default/hydro_io.h \ diff --git a/src/cell.c b/src/cell.c index 2796a8933e1fedc2522c833570558adda369140b..7df55ce04ca739da2de6e6061048ad1fe3998a1e 100644 --- a/src/cell.c +++ b/src/cell.c @@ -709,6 +709,92 @@ void cell_clean_links(struct cell *c, void *data) { } /** +<<<<<<< HEAD + * @brief Checks whether the cells are direct neighbours ot not. Both cells have + * to be of the same size + * + * @param ci First #cell. + * @param cj Second #cell. + * + * @todo Deal with periodicity. + */ +int cell_are_neighbours(const struct cell *restrict ci, + const struct cell *restrict cj) { + +#ifdef SWIFT_DEBUG_CHECKS + if (ci->width[0] != cj->width[0]) error("Cells of different size !"); +#endif + + /* Maximum allowed distance */ + const double min_dist = + 1.2 * ci->width[0]; /* 1.2 accounts for rounding errors */ + + /* (Manhattan) Distance between the cells */ + for (int k = 0; k < 3; k++) { + const double center_i = ci->loc[k]; + const double center_j = cj->loc[k]; + if (fabsf(center_i - center_j) > min_dist) return 0; + } + + return 1; +} + +/** + * @brief Computes the multi-pole brutally and compare to the + * recursively computed one. + * + * @param c Cell to act upon + * @param data Unused parameter + */ +void cell_check_multipole(struct cell *c, void *data) { + + struct multipole ma; + + if (c->gcount > 0) { + + /* Brute-force calculation */ + multipole_init(&ma, c->gparts, c->gcount); + + /* Compare with recursive one */ + struct multipole mb = c->multipole; + + if (fabsf(ma.mass - mb.mass) / fabsf(ma.mass + mb.mass) > 1e-5) + error("Multipole masses are different (%12.15e vs. %12.15e)", ma.mass, + mb.mass); + + for (int k = 0; k < 3; ++k) + if (fabsf(ma.CoM[k] - mb.CoM[k]) / fabsf(ma.CoM[k] + mb.CoM[k]) > 1e-5) + error("Multipole CoM are different (%12.15e vs. %12.15e", ma.CoM[k], + mb.CoM[k]); + + if (fabsf(ma.I_xx - mb.I_xx) / fabsf(ma.I_xx + mb.I_xx) > 1e-5 && + ma.I_xx > 1e-9) + error("Multipole I_xx are different (%12.15e vs. %12.15e)", ma.I_xx, + mb.I_xx); + if (fabsf(ma.I_yy - mb.I_yy) / fabsf(ma.I_yy + mb.I_yy) > 1e-5 && + ma.I_yy > 1e-9) + error("Multipole I_yy are different (%12.15e vs. %12.15e)", ma.I_yy, + mb.I_yy); + if (fabsf(ma.I_zz - mb.I_zz) / fabsf(ma.I_zz + mb.I_zz) > 1e-5 && + ma.I_zz > 1e-9) + error("Multipole I_zz are different (%12.15e vs. %12.15e)", ma.I_zz, + mb.I_zz); + if (fabsf(ma.I_xy - mb.I_xy) / fabsf(ma.I_xy + mb.I_xy) > 1e-5 && + ma.I_xy > 1e-9) + error("Multipole I_xy are different (%12.15e vs. %12.15e)", ma.I_xy, + mb.I_xy); + if (fabsf(ma.I_xz - mb.I_xz) / fabsf(ma.I_xz + mb.I_xz) > 1e-5 && + ma.I_xz > 1e-9) + error("Multipole I_xz are different (%12.15e vs. %12.15e)", ma.I_xz, + mb.I_xz); + if (fabsf(ma.I_yz - mb.I_yz) / fabsf(ma.I_yz + mb.I_yz) > 1e-5 && + ma.I_yz > 1e-9) + error("Multipole I_yz are different (%12.15e vs. %12.15e)", ma.I_yz, + mb.I_yz); + } +} + +/* * @brief Frees up the memory allocated for this #cell */ void cell_clean(struct cell *c) { diff --git a/src/cell.h b/src/cell.h index a14d94d50ce6a4d2d25e7e6005c035cb22a3720d..150718eeb4bd3857e37d517718fe53661033a330 100644 --- a/src/cell.h +++ b/src/cell.h @@ -197,6 +197,9 @@ void cell_init_parts(struct cell *c, void *data); void cell_init_gparts(struct cell *c, void *data); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); +int cell_are_neighbours(const struct cell *restrict ci, + const struct cell *restrict cj); +void cell_check_multipole(struct cell *c, void *data); void cell_clean(struct cell *c); #endif /* SWIFT_CELL_H */ diff --git a/src/common_io.c b/src/common_io.c index d3590ae402487eb2f605a1cf3f5722f86704ad88..3c001d9da106a46ef5033c8cdec9346d68c54ecd 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -374,6 +374,9 @@ void writeCodeDescription(hid_t h_file) { writeAttribute_s(h_grpcode, "Git Branch", git_branch()); writeAttribute_s(h_grpcode, "Git Revision", git_revision()); writeAttribute_s(h_grpcode, "HDF5 library version", hdf5_version()); +#ifdef HAVE_FFTW + writeAttribute_s(h_grpcode, "FFTW library version", fftw3_version()); +#endif #ifdef WITH_MPI writeAttribute_s(h_grpcode, "MPI library", mpi_version()); #ifdef HAVE_METIS diff --git a/src/const.h b/src/const.h index 50d72e3339d7b8f04e3028f09794aff45f03dc65..736ba0155e03dc0f595ac06edfd0763b38302e9c 100644 --- a/src/const.h +++ b/src/const.h @@ -36,11 +36,6 @@ /* Time integration constants. */ #define const_max_u_change 0.1f -/* Gravity stuff. */ -#define const_theta_max 0.57735f -#define const_G 6.672e-8f /* Gravitational constant. */ -#define const_epsilon 0.0014f /* Gravity blending distance. */ - /* Hydrodynamical adiabatic index. */ #define HYDRO_GAMMA_5_3 //#define HYDRO_GAMMA_4_3 @@ -59,7 +54,13 @@ #define GADGET2_SPH //#define DEFAULT_SPH -/* External potential properties */ +/* Self gravity stuff. */ +#define const_gravity_multipole_order 2 +#define const_gravity_a_smooth 1.25f +#define const_gravity_r_cut 4.5f +#define const_gravity_eta 0.025f + +/* External gravity properties */ #define EXTERNAL_POTENTIAL_POINTMASS //#define EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL diff --git a/src/drift.h b/src/drift.h index c71cda49b0ad2b18b227fc2d485bbe76f32d5b94..a6e347cc337385f89bece53bc477bff7d163c202 100644 --- a/src/drift.h +++ b/src/drift.h @@ -26,6 +26,7 @@ #include "const.h" #include "debug.h" #include "hydro.h" +#include "part.h" /** * @brief Perform the 'drift' operation on a #gpart diff --git a/src/engine.c b/src/engine.c index b33850be9415f30f6c0ac38e70a74a7d239b791e..ebd589bbb0af01cec3196c658c4d014daca8df38 100644 --- a/src/engine.c +++ b/src/engine.c @@ -64,6 +64,7 @@ #include "serial_io.h" #include "single_io.h" #include "timers.h" +#include "tools.h" #include "units.h" #include "version.h" @@ -129,12 +130,10 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c, engine_policy_external_gravity; const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt; - /* Am I the super-cell? */ - /* TODO(pedro): Add a condition for gravity tasks as well. */ - if (super == NULL && - (c->density != NULL || (!c->split && (c->count > 0 || c->gcount > 0)))) { + /* Is this the super-cell? */ + if (super == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) { - /* This is the super cell, i.e. the first with density tasks attached. */ + /* This is the super cell, i.e. the first with gravity tasks attached. */ super = c; /* Local tasks only... */ @@ -1182,9 +1181,62 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts, #endif } +/** + * @brief Constructs the top-level tasks for the short-range gravity + * interactions. + * + * All top-cells get a self task. + * All neighbouring pairs get a pair task. + * All non-neighbouring pairs within a range of 6 cells get a M-M task. + * + * @param e The #engine. + */ +void engine_make_gravity_tasks(struct engine *e) { + + struct space *s = e->s; + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + struct cell *cells = s->cells; + const int nr_cells = s->nr_cells; + + for (int cid = 0; cid < nr_cells; ++cid) { + + struct cell *ci = &cells[cid]; + + /* Skip cells without gravity particles */ + if (ci->gcount == 0) continue; + + /* Is that neighbour local ? */ + if (ci->nodeID != nodeID) continue; + + /* If the cells is local build a self-interaction */ + scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL, + 0); + + /* Let's also build a task for all the non-neighbouring pm calculations */ + scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, 0, 0, ci, + NULL, 0); + + for (int cjd = cid + 1; cjd < nr_cells; ++cjd) { + + struct cell *cj = &cells[cjd]; + + /* Skip cells without gravity particles */ + if (cj->gcount == 0) continue; + + /* Is that neighbour local ? */ + if (cj->nodeID != nodeID) continue; + + if (cell_are_neighbours(ci, cj)) + scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci, + cj, 1); + } + } +} + /** * @brief Constructs the top-level pair tasks for the first hydro loop over - *neighbours + * neighbours * * Here we construct all the tasks for all possible neighbouring non-empty * local cells in the hierarchy. No dependencies are being added thus far. @@ -1316,6 +1368,115 @@ void engine_count_and_link_tasks(struct engine *e) { } } +/** + * @brief Creates the dependency network for the gravity tasks of a given cell. + * + * @param sched The #scheduler. + * @param gravity The gravity task to link. + * @param c The cell. + */ +static inline void engine_make_gravity_dependencies(struct scheduler *sched, + struct task *gravity, + struct cell *c) { + + /* init --> gravity --> kick */ + scheduler_addunlock(sched, c->super->init, gravity); + scheduler_addunlock(sched, gravity, c->super->kick); + + /* grav_up --> gravity ( --> kick) */ + scheduler_addunlock(sched, c->super->grav_up, gravity); +} + +/** + * @brief Creates all the task dependencies for the gravity + * + * @param e The #engine + */ +void engine_link_gravity_tasks(struct engine *e) { + + struct scheduler *sched = &e->sched; + const int nodeID = e->nodeID; + const int nr_tasks = sched->nr_tasks; + + /* Add one task gathering all the multipoles */ + struct task *gather = scheduler_addtask( + sched, task_type_grav_gather_m, task_subtype_none, 0, 0, NULL, NULL, 0); + + /* And one task performing the FFT */ + struct task *fft = scheduler_addtask(sched, task_type_grav_fft, + task_subtype_none, 0, 0, NULL, NULL, 0); + + scheduler_addunlock(sched, gather, fft); + + for (int k = 0; k < nr_tasks; k++) { + + /* Get a pointer to the task. */ + struct task *t = &sched->tasks[k]; + + /* Skip? */ + if (t->skip) continue; + + /* Multipole construction */ + if (t->type == task_type_grav_up) { + scheduler_addunlock(sched, t, gather); + } + + /* Long-range interaction */ + if (t->type == task_type_grav_mm) { + + /* Gather the multipoles --> mm interaction --> kick */ + scheduler_addunlock(sched, gather, t); + scheduler_addunlock(sched, t, t->ci->super->kick); + + /* init --> mm interaction */ + scheduler_addunlock(sched, t->ci->super->init, t); + } + + /* Self-interaction? */ + if (t->type == task_type_self && t->subtype == task_subtype_grav) { + + engine_make_gravity_dependencies(sched, t, t->ci); + + } + + /* Otherwise, pair interaction? */ + else if (t->type == task_type_pair && t->subtype == task_subtype_grav) { + + if (t->ci->nodeID == nodeID) { + + engine_make_gravity_dependencies(sched, t, t->ci); + } + + if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + + engine_make_gravity_dependencies(sched, t, t->cj); + } + + } + + /* Otherwise, sub-self interaction? */ + else if (t->type == task_type_sub_self && t->subtype == task_subtype_grav) { + + if (t->ci->nodeID == nodeID) { + engine_make_gravity_dependencies(sched, t, t->ci); + } + } + + /* Otherwise, sub-pair interaction? */ + else if (t->type == task_type_sub_pair && t->subtype == task_subtype_grav) { + + if (t->ci->nodeID == nodeID) { + + engine_make_gravity_dependencies(sched, t, t->ci); + } + if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) { + + engine_make_gravity_dependencies(sched, t, t->cj); + } + } + } +} + /** * @brief Creates the dependency network for the hydro tasks of a given cell. * @@ -1453,41 +1614,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { } } -/** - * @brief Constructs the top-level pair tasks for the gravity M-M interactions - * - * Correct implementation is still lacking here. - * - * @param e The #engine. - */ -void engine_make_gravityinteraction_tasks(struct engine *e) { - - struct space *s = e->s; - struct scheduler *sched = &e->sched; - const int nr_cells = s->nr_cells; - struct cell *cells = s->cells; - - /* Loop over all cells. */ - for (int i = 0; i < nr_cells; i++) { - - /* If it has gravity particles, add a self-task */ - if (cells[i].gcount > 0) { - scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0, - &cells[i], NULL, 0); - - /* Loop over all remainding cells */ - for (int j = i + 1; j < nr_cells; j++) { - - /* If that other cell has gravity parts, add a pair interaction */ - if (cells[j].gcount > 0) { - scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0, - &cells[i], &cells[j], 0); - } - } - } - } -} - /** * @brief Constructs the gravity tasks building the multipoles and propagating *them to the children @@ -1513,9 +1639,12 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { struct task *up = scheduler_addtask(sched, task_type_grav_up, task_subtype_none, 0, 0, &cells[k], NULL, 0); - struct task *down = - scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0, 0, - &cells[k], NULL, 0); + + struct task *down = NULL; + /* struct task *down = */ + /* scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0, + * 0, */ + /* &cells[k], NULL, 0); */ /* Push tasks down the cell hierarchy. */ engine_addtasks_grav(e, &cells[k], up, down); @@ -1548,11 +1677,10 @@ void engine_maketasks(struct engine *e) { } /* Construct the firt hydro loop over neighbours */ - engine_make_hydroloop_tasks(e); + if (e->policy & engine_policy_hydro) engine_make_hydroloop_tasks(e); /* Add the gravity mm tasks. */ - if (e->policy & engine_policy_self_gravity) - engine_make_gravityinteraction_tasks(e); + if (e->policy & engine_policy_self_gravity) engine_make_gravity_tasks(e); /* Split the tasks. */ scheduler_splittasks(sched); @@ -1573,7 +1701,7 @@ void engine_maketasks(struct engine *e) { /* Count the number of tasks associated with each cell and store the density tasks in each cell, and make each sort depend on the sorts of its progeny. */ - engine_count_and_link_tasks(e); + if (e->policy & engine_policy_hydro) engine_count_and_link_tasks(e); /* Append hierarchical tasks to each cells */ if (e->policy & engine_policy_hydro) @@ -1588,7 +1716,12 @@ void engine_maketasks(struct engine *e) { /* Run through the tasks and make force tasks for each density task. Each force task depends on the cell ghosts and unlocks the kick task of its super-cell. */ - engine_make_extra_hydroloop_tasks(e); + if (e->policy & engine_policy_hydro) engine_make_extra_hydroloop_tasks(e); + + /* Add the dependencies for the self-gravity stuff */ + if (e->policy & engine_policy_self_gravity) engine_link_gravity_tasks(e); + +#ifdef WITH_MPI /* Add the communication tasks if MPI is being used. */ if (e->policy & engine_policy_mpi) { @@ -1611,6 +1744,7 @@ void engine_maketasks(struct engine *e) { NULL); } } +#endif /* Set the unlocks per task. */ scheduler_set_unlocks(sched); @@ -1740,7 +1874,7 @@ int engine_marktasks(struct engine *e) { continue; /* Set the sort flags. */ - if (t->type == task_type_pair) { + if (t->type == task_type_pair && t->subtype != task_subtype_grav) { if (!(ci->sorted & (1 << t->flags))) { ci->sorts->flags |= (1 << t->flags); ci->sorts->skip = 0; @@ -2144,6 +2278,7 @@ void engine_collect_drift(struct cell *c) { c->ang_mom[1] = ang_mom[1]; c->ang_mom[2] = ang_mom[2]; } + /** * @brief Print the conserved quantities statistics to a log file * @@ -2315,7 +2450,16 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* Add the tasks corresponding to self-gravity to the masks */ if (e->policy & engine_policy_self_gravity) { - /* Nothing here for now */ + mask |= 1 << task_type_grav_up; + mask |= 1 << task_type_grav_mm; + mask |= 1 << task_type_grav_gather_m; + mask |= 1 << task_type_grav_fft; + mask |= 1 << task_type_self; + mask |= 1 << task_type_pair; + mask |= 1 << task_type_sub_self; + mask |= 1 << task_type_sub_pair; + + submask |= 1 << task_subtype_grav; } /* Add the tasks corresponding to external gravity to the masks */ @@ -2326,6 +2470,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) { /* Add MPI tasks if need be */ if (e->policy & engine_policy_mpi) { + mask |= 1 << task_type_send; mask |= 1 << task_type_recv; submask |= 1 << task_subtype_tend; @@ -2450,7 +2595,16 @@ void engine_step(struct engine *e) { /* Add the tasks corresponding to self-gravity to the masks */ if (e->policy & engine_policy_self_gravity) { - /* Nothing here for now */ + mask |= 1 << task_type_grav_up; + mask |= 1 << task_type_grav_mm; + mask |= 1 << task_type_grav_gather_m; + mask |= 1 << task_type_grav_fft; + mask |= 1 << task_type_self; + mask |= 1 << task_type_pair; + mask |= 1 << task_type_sub_self; + mask |= 1 << task_type_sub_pair; + + submask |= 1 << task_subtype_grav; } /* Add the tasks corresponding to external gravity to the masks */ @@ -2460,6 +2614,7 @@ void engine_step(struct engine *e) { /* Add MPI tasks if need be */ if (e->policy & engine_policy_mpi) { + mask |= 1 << task_type_send; mask |= 1 << task_type_recv; submask |= 1 << task_subtype_tend; diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 32af9b6c67fb9d78d70ae6f41e8b4a32bfb1d8c8..5903ce1384f616a123c74b579539fe53747d17e4 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -53,8 +53,6 @@ gravity_compute_timestep_external(const struct external_potential* potential, /** * @brief Computes the gravity time-step of a given particle due to self-gravity * - * This function only branches towards the potential chosen by the user. - * * @param phys_const The physical constants in internal units. * @param gp Pointer to the g-particle data. */ @@ -62,7 +60,13 @@ __attribute__((always_inline)) INLINE static float gravity_compute_timestep_self(const struct phys_const* const phys_const, const struct gpart* const gp) { - float dt = FLT_MAX; + const float ac2 = gp->a_grav[0] * gp->a_grav[0] + + gp->a_grav[1] * gp->a_grav[1] + + gp->a_grav[2] * gp->a_grav[2]; + + const float ac = (ac2 > 0.f) ? sqrtf(ac2) : FLT_MIN; + + const float dt = sqrt(2.f * const_gravity_eta * gp->epsilon / ac); return dt; } @@ -76,7 +80,9 @@ gravity_compute_timestep_self(const struct phys_const* const phys_const, * @param gp The particle to act upon */ __attribute__((always_inline)) INLINE static void gravity_first_init_gpart( - struct gpart* gp) {} + struct gpart* gp) { + gp->epsilon = 0.; // MATTHIEU +} /** * @brief Prepares a g-particle for the gravity calculation @@ -101,9 +107,16 @@ __attribute__((always_inline)) INLINE static void gravity_init_gpart( * Multiplies the forces and accelerations by the appropiate constants * * @param gp The particle to act upon + * @param const_G Newton's constant */ __attribute__((always_inline)) INLINE static void gravity_end_force( - struct gpart* gp) {} + struct gpart* gp, double const_G) { + + /* Let's get physical... */ + gp->a_grav[0] *= const_G; + gp->a_grav[1] *= const_G; + gp->a_grav[2] *= const_G; +} /** * @brief Computes the gravitational acceleration induced by external potentials diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index dd6878841d7d0632ab8e4f218044516d9895cab8..aa285ce3245aeac608aa022924a613a7a7b00375 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -23,93 +23,168 @@ /* Includes. */ #include "const.h" #include "kernel_gravity.h" +#include "kernel_long_gravity.h" +#include "multipole.h" #include "vector.h" /** - * @brief Gravity potential + * @brief Gravity forces between particles */ -__attribute__((always_inline)) INLINE static void runner_iact_grav( - float r2, float *dx, struct gpart *pi, struct gpart *pj) { +__attribute__((always_inline)) INLINE static void runner_iact_grav_pp( + float rlr_inv, float r2, const float *dx, struct gpart *gpi, + struct gpart *gpj) { - float ir, r; - float w, acc; - float mi = pi->mass, mj = pj->mass; - int k; + /* Apply the gravitational acceleration. */ + const float r = sqrtf(r2); + const float ir = 1.f / r; + const float mi = gpi->mass; + const float mj = gpj->mass; + const float hi = gpi->epsilon; + const float hj = gpj->epsilon; + const float u = r * rlr_inv; + float f_lr, fi, fj, W; - /* Get the absolute distance. */ - ir = 1.0f / sqrtf(r2); - r = r2 * ir; + /* Get long-range correction */ + kernel_long_grav_eval(u, &f_lr); - /* Evaluate the gravity kernel. */ - kernel_grav_eval(r, &acc); + if (r >= hi) { - /* Scale the acceleration. */ - acc *= const_G * ir * ir * ir; + /* Get Newtonian gravity */ + fi = mj * ir * ir * ir * f_lr; - /* Aggregate the accelerations. */ - for (k = 0; k < 3; k++) { - w = acc * dx[k]; - pi->a_grav[k] -= w * mj; - pj->a_grav[k] += w * mi; + } else { + + const float hi_inv = 1.f / hi; + const float hi_inv3 = hi_inv * hi_inv * hi_inv; + const float ui = r * hi_inv; + + kernel_grav_eval(ui, &W); + + /* Get softened gravity */ + fi = mj * hi_inv3 * W * f_lr; } + + if (r >= hj) { + + /* Get Newtonian gravity */ + fj = mi * ir * ir * ir * f_lr; + + } else { + + const float hj_inv = 1.f / hj; + const float hj_inv3 = hj_inv * hj_inv * hj_inv; + const float uj = r * hj_inv; + + kernel_grav_eval(uj, &W); + + /* Get softened gravity */ + fj = mi * hj_inv3 * W * f_lr; + } + + const float fidx[3] = {fi * dx[0], fi * dx[1], fi * dx[2]}; + gpi->a_grav[0] -= fidx[0]; + gpi->a_grav[1] -= fidx[1]; + gpi->a_grav[2] -= fidx[2]; + + const float fjdx[3] = {fj * dx[0], fj * dx[1], fj * dx[2]}; + gpj->a_grav[0] += fjdx[0]; + gpj->a_grav[1] += fjdx[1]; + gpj->a_grav[2] += fjdx[2]; } /** - * @brief Gravity potential (Vectorized version) + * @brief Gravity forces between particles (non-symmetric version) */ -__attribute__((always_inline)) INLINE static void runner_iact_vec_grav( - float *R2, float *Dx, struct gpart **pi, struct gpart **pj) { - -#ifdef WITH_VECTORIZATION - - vector ir, r, r2, dx[3]; - vector w, acc, ai, aj; - vector mi, mj; - int j, k; - -#if VEC_SIZE == 8 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass, - pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass, - pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass); - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k], - Dx[15 + k], Dx[18 + k], Dx[21 + k]); -#elif VEC_SIZE == 4 - mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass); - mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass); - for (k = 0; k < 3; k++) - dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]); -#endif +__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_nonsym( + float rlr_inv, float r2, const float *dx, struct gpart *gpi, + const struct gpart *gpj) { - /* Get the radius and inverse radius. */ - r2.v = vec_load(R2); - ir.v = vec_rsqrt(r2.v); - ir.v = ir.v - vec_set1(0.5f) * ir.v * (r2.v * ir.v * ir.v - vec_set1(1.0f)); - r.v = r2.v * ir.v; - - /* Evaluate the gravity kernel. */ - blender_eval_vec(&r, &acc); - - /* Scale the acceleration. */ - acc.v *= vec_set1(const_G) * ir.v * ir.v * ir.v; - - /* Aggregate the accelerations. */ - for (k = 0; k < 3; k++) { - w.v = acc.v * dx[k].v; - ai.v = w.v * mj.v; - aj.v = w.v * mi.v; - for (j = 0; j < VEC_SIZE; j++) { - pi[j]->a_grav[k] -= ai.f[j]; - pj[j]->a_grav[k] += aj.f[j]; - } + /* Apply the gravitational acceleration. */ + const float r = sqrtf(r2); + const float ir = 1.f / r; + const float mj = gpj->mass; + const float hi = gpi->epsilon; + const float u = r * rlr_inv; + float f_lr, f, W; + + /* Get long-range correction */ + kernel_long_grav_eval(u, &f_lr); + + if (r >= hi) { + + /* Get Newtonian gravity */ + f = mj * ir * ir * ir * f_lr; + + } else { + + const float hi_inv = 1.f / hi; + const float hi_inv3 = hi_inv * hi_inv * hi_inv; + const float ui = r * hi_inv; + + kernel_grav_eval(ui, &W); + + /* Get softened gravity */ + f = mj * hi_inv3 * W * f_lr; } -#else + const float fdx[3] = {f * dx[0], f * dx[1], f * dx[2]}; + + gpi->a_grav[0] -= fdx[0]; + gpi->a_grav[1] -= fdx[1]; + gpi->a_grav[2] -= fdx[2]; +} - for (int k = 0; k < VEC_SIZE; k++) - runner_iact_grav(R2[k], &Dx[3 * k], pi[k], pj[k]); +/** + * @brief Gravity forces between particle and multipole + */ +__attribute__((always_inline)) INLINE static void runner_iact_grav_pm( + float rlr_inv, float r2, const float *dx, struct gpart *gp, + const struct multipole *multi) { + + /* Apply the gravitational acceleration. */ + const float r = sqrtf(r2); + const float ir = 1.f / r; + const float u = r * rlr_inv; + const float mrinv3 = multi->mass * ir * ir * ir; + + /* Get long-range correction */ + float f_lr; + kernel_long_grav_eval(u, &f_lr); + +#if const_gravity_multipole_order < 2 + + /* 0th and 1st order terms */ + gp->a_grav[0] += mrinv3 * f_lr * dx[0]; + gp->a_grav[1] += mrinv3 * f_lr * dx[1]; + gp->a_grav[2] += mrinv3 * f_lr * dx[2]; + +#elif const_gravity_multipole_order == 2 + /* Terms up to 2nd order (quadrupole) */ + + /* Follows the notation in Bonsai */ + const float mrinv5 = mrinv3 * ir * ir; + const float mrinv7 = mrinv5 * ir * ir; + + const float D1 = -mrinv3; + const float D2 = 3.f * mrinv5; + const float D3 = -15.f * mrinv7; + + const float q = multi->I_xx + multi->I_yy + multi->I_zz; + const float qRx = + multi->I_xx * dx[0] + multi->I_xy * dx[1] + multi->I_xz * dx[2]; + const float qRy = + multi->I_xy * dx[0] + multi->I_yy * dx[1] + multi->I_yz * dx[2]; + const float qRz = + multi->I_xz * dx[0] + multi->I_yz * dx[1] + multi->I_zz * dx[2]; + const float qRR = qRx * dx[0] + qRy * dx[1] + qRz * dx[2]; + const float C = D1 + 0.5f * D2 * q + 0.5f * D3 * qRR; + + gp->a_grav[0] -= f_lr * (C * dx[0] + D2 * qRx); + gp->a_grav[1] -= f_lr * (C * dx[1] + D2 * qRy); + gp->a_grav[2] -= f_lr * (C * dx[2] + D2 * qRz); +#else +#error "Multipoles of order >2 not yet implemented." #endif } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index b6c9e62559207cb25323c8a108e4bffc87ea0fcf..77e6543429993bea355c04dd28fcb5b04eee5519 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -37,16 +37,15 @@ struct gpart { /* Particle mass. */ float mass; + /* Softening length */ + float epsilon; + /* Particle time of beginning of time-step. */ int ti_begin; /* Particle time of end of time-step. */ int ti_end; - /* /\* current time of x, and of v_full *\/ */ - /* float tx; */ - /* float tv; */ - /* Particle ID. If negative, it is the negative offset of the #part with which this gpart is linked. */ long long id_or_neg_offset; diff --git a/src/kernel_gravity.c b/src/kernel_gravity.c deleted file mode 100644 index 639a964c813ef7fd95008857ee17b7dd5ffafb27..0000000000000000000000000000000000000000 --- a/src/kernel_gravity.c +++ /dev/null @@ -1,74 +0,0 @@ -/******************************************************************************* - * This file is part of SWIFT. - * Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk), - * Matthieu Schaller (matthieu.schaller@durham.ac.uk) - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - ******************************************************************************/ - -#include <math.h> -#include <stdio.h> - -#include "kernel_gravity.h" - -/** - * @brief The Gadget-2 gravity kernel function - * - * @param r The distance between particles - */ -float gadget(float r) { - float fac, h_inv, u, r2 = r * r; - if (r >= const_epsilon) - fac = 1.0f / (r2 * r); - else { - h_inv = 1. / const_epsilon; - u = r * h_inv; - if (u < 0.5) - fac = const_iepsilon3 * (10.666666666667 + u * u * (32.0 * u - 38.4)); - else - fac = const_iepsilon3 * - (21.333333333333 - 48.0 * u + 38.4 * u * u - - 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u)); - } - return const_G * fac; -} - -/** - * @brief Test the gravity kernel function by dumping it in the interval [0,1]. - * - * @param r_max The radius up to which the kernel is dumped. - * @param N number of intervals in [0,1]. - */ -void gravity_kernel_dump(float r_max, int N) { - - int k; - float x, w; - float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - // float dw_dx4[4] __attribute__ ((aligned (16))); - - for (k = 1; k <= N; k++) { - x = (r_max * k) / N; - x4[3] = x4[2]; - x4[2] = x4[1]; - x4[1] = x4[0]; - x4[0] = x; - kernel_grav_eval(x, &w); - w *= const_G / (x * x * x); - // blender_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 ); - printf(" %.16e %.16e %.16e %.16e %.16e %.16e %.16e\n", x, w * x, w4[0], - w4[1], w4[2], w4[3], gadget(x) * x); - } -} diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h index 0c786f5189b882ed4593ef4a927edbf2a8cdd6e7..b38feb5758debf87add2007ee3684d869f393f7e 100644 --- a/src/kernel_gravity.h +++ b/src/kernel_gravity.h @@ -20,203 +20,70 @@ #ifndef SWIFT_KERNEL_GRAVITY_H #define SWIFT_KERNEL_GRAVITY_H +#include <math.h> + /* Includes. */ #include "const.h" #include "inline.h" #include "vector.h" -#define const_iepsilon (1. / const_epsilon) -#define const_iepsilon2 (const_iepsilon * const_iepsilon) -#define const_iepsilon3 (const_iepsilon2 * const_iepsilon) -#define const_iepsilon4 (const_iepsilon2 * const_iepsilon2) -#define const_iepsilon5 (const_iepsilon3 * const_iepsilon2) -#define const_iepsilon6 (const_iepsilon3 * const_iepsilon3) - /* The gravity kernel is defined as a degree 6 polynomial in the distance r. The resulting value should be post-multiplied with r^-3, resulting in a polynomial with terms ranging from r^-3 to r^3, which are sufficient to model both the direct potential as well as the splines - near the origin. */ - -/* Coefficients for the gravity kernel. */ -#define kernel_grav_degree 6 -#define kernel_grav_ivals 2 -#define kernel_grav_scale (2 * const_iepsilon) -static float - kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = { - 32.0f * const_iepsilon6, - -192.0f / 5.0f * const_iepsilon5, - 0.0f, - 32.0f / 3.0f * const_iepsilon3, - 0.0f, - 0.0f, - 0.0f, - -32.0f / 3.0f * const_iepsilon6, - 192.0f / 5.0f * const_iepsilon5, - -48.0f * const_iepsilon4, - 64.0f / 3.0f * const_iepsilon3, - 0.0f, - 0.0f, - -1.0f / 15.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 0.0f, - 1.0f}; + near the origin. + As in the hydro case, the 1/h^3 needs to be multiplied in afterwards */ + +/* Coefficients for the kernel. */ +#define kernel_grav_name "Gadget-2 softening kernel" +#define kernel_grav_degree 6 /* Degree of the polynomial */ +#define kernel_grav_ivals 2 /* Number of branches */ +static const float + kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] + __attribute__((aligned(16))) = {32.f, + -38.4f, + 0.f, + 10.66666667f, + 0.f, + 0.f, + 0.f, /* 0 < u < 0.5 */ + -10.66666667f, + 38.4f, + -48.f, + 21.3333333, + 0.f, + 0.f, + -0.066666667f, /* 0.5 < u < 1 */ + 0.f, + 0.f, + 0.f, + 0.f, + 0.f, + 0.f, + 0.f}; /* 1 < u */ /** - * @brief Computes the gravity cubic spline for a given distance x. - */ - -__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x, - float *W) { - int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals); - float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -#ifdef WITH_VECTORIZATION - -/** - * @brief Computes the gravity cubic spline for a given distance x (Vectorized - * version). - */ - -__attribute__((always_inline)) INLINE static void kernel_grav_eval_vec( - vector *x, vector *w) { - - vector ind, c[kernel_grav_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale), - vec_set1((float)kernel_grav_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < kernel_grav_degree + 1; j++) - c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - - /* And we're off! */ - for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v; -} - -#endif - -/* Blending function stuff - * -------------------------------------------------------------------------------------------- - */ - -/* Coefficients for the blending function. */ -#define blender_degree 3 -#define blender_ivals 3 -#define blender_scale 4.0f -static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = { - 0.0f, 0.0f, 0.0f, 1.0f, -32.0f, 24.0f, -6.0f, 1.5f, - -32.0f, 72.0f, -54.0f, 13.5f, 0.0f, 0.0f, 0.0f, 0.0f}; - -/** - * @brief Computes the cubic spline blender for a given distance x. - */ - -__attribute__((always_inline)) INLINE static void blender_eval(float x, - float *W) { - int ind = fmin(x * blender_scale, blender_ivals); - float *coeffs = &blender_coeffs[ind * (blender_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k]; - *W = w; -} - -/** - * @brief Computes the cubic spline blender and its derivative for a given - * distance x. - */ - -__attribute__((always_inline)) INLINE static void blender_deval(float x, - float *W, - float *dW_dx) { - int ind = fminf(x * blender_scale, blender_ivals); - float *coeffs = &blender_coeffs[ind * (blender_degree + 1)]; - float w = coeffs[0] * x + coeffs[1]; - float dw_dx = coeffs[0]; - for (int k = 2; k <= blender_degree; k++) { - dw_dx = dw_dx * x + w; - w = x * w + coeffs[k]; - } - *W = w; - *dW_dx = dw_dx; -} - -#ifdef WITH_VECTORIZATION - -/** - * @brief Computes the cubic spline blender and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<2. - */ - -__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x, - vector *w) { - - vector ind, c[blender_degree + 1]; - int j, k; - - /* Load x and get the interval id. */ - ind.m = vec_ftoi( - vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals))); - - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < blender_degree + 1; j++) - c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j]; - - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - - /* And we're off! */ - for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v; -} - -/** - * @brief Computes the cubic spline blender and its derivative for a given - * distance x (Vectorized version). Gives a sensible answer only if x<2. + * @brief Computes the gravity softening function. + * + * @param u The ratio of the distance to the softening length $u = x/h$. + * @param W (return) The value of the kernel function $W(x,h)$. */ +__attribute__((always_inline)) INLINE static void kernel_grav_eval( + float u, float *const W) { -__attribute__((always_inline)) INLINE static void blender_deval_vec( - vector *x, vector *w, vector *dw_dx) { - - vector ind, c[blender_degree + 1]; - int j, k; + /* Pick the correct branch of the kernel */ + const int ind = (int)fminf(u * (float)kernel_grav_ivals, kernel_grav_ivals); + const float *const coeffs = + &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)]; - /* Load x and get the interval id. */ - ind.m = vec_ftoi( - vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals))); + /* First two terms of the polynomial ... */ + float w = coeffs[0] * u + coeffs[1]; - /* load the coefficients. */ - for (k = 0; k < VEC_SIZE; k++) - for (j = 0; j < blender_degree + 1; j++) - c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j]; + /* ... and the rest of them */ + for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k]; - /* Init the iteration for Horner's scheme. */ - w->v = (c[0].v * x->v) + c[1].v; - dw_dx->v = c[0].v; - - /* And we're off! */ - for (int k = 2; k <= blender_degree; k++) { - dw_dx->v = (dw_dx->v * x->v) + w->v; - w->v = (x->v * w->v) + c[k].v; - } + /* Return everything */ + *W = w / (u * u * u); } -#endif - -void gravity_kernel_dump(float r_max, int N); - #endif // SWIFT_KERNEL_GRAVITY_H diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h new file mode 100644 index 0000000000000000000000000000000000000000..d247c7a461d4bd116f30ab106143f6c75e1b941e --- /dev/null +++ b/src/kernel_long_gravity.h @@ -0,0 +1,50 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_KERNEL_LONG_GRAVITY_H +#define SWIFT_KERNEL_LONG_GRAVITY_H + +#include <math.h> + +/* Includes. */ +#include "const.h" +#include "inline.h" +#include "vector.h" + +#define one_over_sqrt_pi ((float)(M_2_SQRTPI * 0.5)) + +/** + * @brief Computes the long-range correction term for the FFT calculation. + * + * @param u The ratio of the distance to the FFT cell scale $u = x/A$. + * @param W (return) The value of the kernel function. + */ +__attribute__((always_inline)) INLINE static void kernel_long_grav_eval( + float u, float *const W) { + + const float arg1 = u * 0.5f; + const float arg2 = u * one_over_sqrt_pi; + const float arg3 = -arg1 * arg1; + + const float term1 = erfc(arg1); + const float term2 = arg2 * expf(arg3); + + *W = term1 + term2; +} + +#endif // SWIFT_KERNEL_LONG_GRAVITY_H diff --git a/src/multipole.c b/src/multipole.c index fc4701fead68e64e8742730325931282f80e8934..f0d0c7084fd9bec366f2185cb24887da40591b17 100644 --- a/src/multipole.c +++ b/src/multipole.c @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -21,149 +22,203 @@ #include "../config.h" /* Some standard headers. */ -#include <float.h> -#include <limits.h> -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -/* MPI headers. */ -#ifdef WITH_MPI -#include <mpi.h> -#endif +#include <strings.h> /* This object's header. */ #include "multipole.h" /** - * @brief Merge two multipoles. - * - * @param ma The #multipole which will contain the merged result. - * @param mb The other #multipole. - */ +* @brief Reset the data of a #multipole. +* +* @param m The #multipole. +*/ +void multipole_reset(struct multipole *m) { -void multipole_merge(struct multipole *ma, struct multipole *mb) { + /* Just bzero the struct. */ + bzero(m, sizeof(struct multipole)); +} -#if multipole_order == 1 +/** +* @brief Init a multipole from a set of particles. +* +* @param m The #multipole. +* @param gparts The #gpart. +* @param gcount The number of particles. +*/ +void multipole_init(struct multipole *m, const struct gpart *gparts, + int gcount) { + +#if const_gravity_multipole_order > 2 +#error "Multipoles of order >2 not yet implemented." +#endif - /* Correct the position. */ - float mma = ma->coeffs[0], mmb = mb->coeffs[0]; - float w = 1.0f / (mma + mmb); - for (int k = 0; k < 3; k++) ma->x[k] = (ma->x[k] * mma + mb->x[k] * mmb) * w; + /* Zero everything */ + multipole_reset(m); - /* Add the particle to the moments. */ - ma->coeffs[0] = mma + mmb; + /* Temporary variables */ + double mass = 0.0; + double com[3] = {0.0, 0.0, 0.0}; -#else -#error( "Multipoles of order %i not yet implemented." , multipole_order ) +#if const_gravity_multipole_order >= 2 + double I_xx = 0.0, I_yy = 0.0, I_zz = 0.0; + double I_xy = 0.0, I_xz = 0.0, I_yz = 0.0; #endif -} - -/** - * @brief Add a particle to the given multipole. - * - * @param m The #multipole. - * @param p The #gpart. - */ - -void multipole_addpart(struct multipole *m, struct gpart *p) { -#if multipole_order == 1 + /* Collect the particle data. */ + for (int k = 0; k < gcount; k++) { + const float w = gparts[k].mass; - /* Correct the position. */ - float mm = m->coeffs[0], mp = p->mass; - float w = 1.0f / (mm + mp); - for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + p->x[k] * mp) * w; + mass += w; + com[0] += gparts[k].x[0] * w; + com[1] += gparts[k].x[1] * w; + com[2] += gparts[k].x[2] * w; + +#if const_gravity_multipole_order >= 2 + I_xx += gparts[k].x[0] * gparts[k].x[0] * w; + I_yy += gparts[k].x[1] * gparts[k].x[1] * w; + I_zz += gparts[k].x[2] * gparts[k].x[2] * w; + I_xy += gparts[k].x[0] * gparts[k].x[1] * w; + I_xz += gparts[k].x[0] * gparts[k].x[2] * w; + I_yz += gparts[k].x[1] * gparts[k].x[2] * w; +#endif + } - /* Add the particle to the moments. */ - m->coeffs[0] = mm + mp; + const double imass = 1.0 / mass; -#else -#error( "Multipoles of order %i not yet implemented." , multipole_order ) + /* Store the data on the multipole. */ + m->mass = mass; + m->CoM[0] = com[0] * imass; + m->CoM[1] = com[1] * imass; + m->CoM[2] = com[2] * imass; + +#if const_gravity_multipole_order >= 2 + m->I_xx = I_xx - imass * com[0] * com[0]; + m->I_yy = I_yy - imass * com[1] * com[1]; + m->I_zz = I_zz - imass * com[2] * com[2]; + m->I_xy = I_xy - imass * com[0] * com[1]; + m->I_xz = I_xz - imass * com[0] * com[2]; + m->I_yz = I_yz - imass * com[1] * com[2]; #endif } /** - * @brief Add a group of particles to the given multipole. + * @brief Add the second multipole to the first one. * - * @param m The #multipole. - * @param p The #gpart array. - * @param N Number of parts to add. + * @param ma The #multipole which will contain the sum. + * @param mb The #multipole to add. */ -void multipole_addparts(struct multipole *m, struct gpart *p, int N) { +void multipole_add(struct multipole *ma, const struct multipole *mb) { -#if multipole_order == 1 - - /* Get the combined mass and positions. */ - double xp[3] = {0.0, 0.0, 0.0}; - float mp = 0.0f, w; - for (int k = 0; k < N; k++) { - w = p[k].mass; - mp += w; - xp[0] += p[k].x[0] * w; - xp[1] += p[k].x[1] * w; - xp[2] += p[k].x[2] * w; - } +#if const_gravity_multipole_order > 2 +#error "Multipoles of order >2 not yet implemented." +#endif /* Correct the position. */ - float mm = m->coeffs[0]; - w = 1.0f / (mm + mp); - for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + xp[k]) * w; - - /* Add the particle to the moments. */ - m->coeffs[0] = mm + mp; + const double ma_mass = ma->mass; + const double mb_mass = mb->mass; + const double M_tot = ma_mass + mb_mass; + const double M_tot_inv = 1.0 / M_tot; + + const double ma_CoM[3] = {ma->CoM[0], ma->CoM[1], ma->CoM[2]}; + const double mb_CoM[3] = {mb->CoM[0], mb->CoM[1], mb->CoM[2]}; + +#if const_gravity_multipole_order >= 2 + const double ma_I_xx = (double)ma->I_xx + ma_mass * ma_CoM[0] * ma_CoM[0]; + const double ma_I_yy = (double)ma->I_yy + ma_mass * ma_CoM[1] * ma_CoM[1]; + const double ma_I_zz = (double)ma->I_zz + ma_mass * ma_CoM[2] * ma_CoM[2]; + const double ma_I_xy = (double)ma->I_xy + ma_mass * ma_CoM[0] * ma_CoM[1]; + const double ma_I_xz = (double)ma->I_xz + ma_mass * ma_CoM[0] * ma_CoM[2]; + const double ma_I_yz = (double)ma->I_yz + ma_mass * ma_CoM[1] * ma_CoM[2]; + + const double mb_I_xx = (double)mb->I_xx + mb_mass * mb_CoM[0] * mb_CoM[0]; + const double mb_I_yy = (double)mb->I_yy + mb_mass * mb_CoM[1] * mb_CoM[1]; + const double mb_I_zz = (double)mb->I_zz + mb_mass * mb_CoM[2] * mb_CoM[2]; + const double mb_I_xy = (double)mb->I_xy + mb_mass * mb_CoM[0] * mb_CoM[1]; + const double mb_I_xz = (double)mb->I_xz + mb_mass * mb_CoM[0] * mb_CoM[2]; + const double mb_I_yz = (double)mb->I_yz + mb_mass * mb_CoM[1] * mb_CoM[2]; +#endif -#else -#error( "Multipoles of order %i not yet implemented." , multipole_order ) + /* New mass */ + ma->mass = M_tot; + + /* New CoM */ + ma->CoM[0] = (ma_CoM[0] * ma_mass + mb_CoM[0] * mb_mass) * M_tot_inv; + ma->CoM[1] = (ma_CoM[1] * ma_mass + mb_CoM[1] * mb_mass) * M_tot_inv; + ma->CoM[2] = (ma_CoM[2] * ma_mass + mb_CoM[2] * mb_mass) * M_tot_inv; + +/* New quadrupole */ +#if const_gravity_multipole_order >= 2 + ma->I_xx = (ma_I_xx + mb_I_xx) - M_tot * ma->CoM[0] * ma->CoM[0]; + ma->I_yy = (ma_I_yy + mb_I_yy) - M_tot * ma->CoM[1] * ma->CoM[1]; + ma->I_zz = (ma_I_zz + mb_I_zz) - M_tot * ma->CoM[2] * ma->CoM[2]; + ma->I_xy = (ma_I_xy + mb_I_xy) - M_tot * ma->CoM[0] * ma->CoM[1]; + ma->I_xz = (ma_I_xz + mb_I_xz) - M_tot * ma->CoM[0] * ma->CoM[2]; + ma->I_yz = (ma_I_yz + mb_I_yz) - M_tot * ma->CoM[1] * ma->CoM[2]; #endif } /** - * @brief Init a multipole from a set of particles. + * @brief Add a particle to the given multipole. * * @param m The #multipole. - * @param parts The #gpart. - * @param N The number of particles. + * @param p The #gpart. */ -void multipole_init(struct multipole *m, struct gpart *parts, int N) { +void multipole_addpart(struct multipole *m, struct gpart *p) { -#if multipole_order == 1 + /* #if const_gravity_multipole_order == 1 */ - float mass = 0.0f, w; - double x[3] = {0.0, 0.0, 0.0}; - int k; + /* /\* Correct the position. *\/ */ + /* float mm = m->coeffs[0], mp = p->mass; */ + /* float w = 1.0f / (mm + mp); */ + /* for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + p->x[k] * mp) * w; + */ - /* Collect the particle data. */ - for (k = 0; k < N; k++) { - w = parts[k].mass; - mass += w; - x[0] += parts[k].x[0] * w; - x[1] += parts[k].x[1] * w; - x[2] += parts[k].x[2] * w; - } - - /* Store the data on the multipole. */ - m->coeffs[0] = mass; - m->x[0] = x[0] / mass; - m->x[1] = x[1] / mass; - m->x[2] = x[2] / mass; + /* /\* Add the particle to the moments. *\/ */ + /* m->coeffs[0] = mm + mp; */ -#else -#error( "Multipoles of order %i not yet implemented." , multipole_order ) -#endif + /* #else */ + /* #error( "Multipoles of order %i not yet implemented." , + * const_gravity_multipole_order ) + */ + /* #endif */ } /** - * @brief Reset the data of a #multipole. + * @brief Add a group of particles to the given multipole. * * @param m The #multipole. + * @param p The #gpart array. + * @param N Number of parts to add. */ -void multipole_reset(struct multipole *m) { +void multipole_addparts(struct multipole *m, struct gpart *p, int N) { - /* Just bzero the struct. */ - bzero(m, sizeof(struct multipole)); + /* #if const_gravity_multipole_order == 1 */ + + /* /\* Get the combined mass and positions. *\/ */ + /* double xp[3] = {0.0, 0.0, 0.0}; */ + /* float mp = 0.0f, w; */ + /* for (int k = 0; k < N; k++) { */ + /* w = p[k].mass; */ + /* mp += w; */ + /* xp[0] += p[k].x[0] * w; */ + /* xp[1] += p[k].x[1] * w; */ + /* xp[2] += p[k].x[2] * w; */ + /* } */ + + /* /\* Correct the position. *\/ */ + /* float mm = m->coeffs[0]; */ + /* w = 1.0f / (mm + mp); */ + /* for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + xp[k]) * w; */ + + /* /\* Add the particle to the moments. *\/ */ + /* m->coeffs[0] = mm + mp; */ + + /* #else */ + /* #error( "Multipoles of order %i not yet implemented." , + * const_gravity_multipole_order ) + */ + /* #endif */ } diff --git a/src/multipole.h b/src/multipole.h index 85ba44d3ce95d958b721d435ccd26b72e30a79c1..dc1f914dd73969bc63f3beffca7f34d0f889edb6 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -28,31 +29,33 @@ #include "kernel_gravity.h" #include "part.h" -/* Some constants. */ -#define multipole_order 1 - /* Multipole struct. */ struct multipole { /* Multipole location. */ - double x[3]; + double CoM[3]; - /* Acceleration on this multipole. */ - float a[3]; + /* Multipole mass */ + float mass; - /* Multipole coefficients. */ - float coeffs[multipole_order * multipole_order]; +#if const_gravity_multipole_order >= 2 + /* Quadrupole terms */ + float I_xx, I_yy, I_zz; + float I_xy, I_xz, I_yz; +#endif }; /* Multipole function prototypes. */ -static void multipole_iact_mm(struct multipole *ma, struct multipole *mb, - double *shift); -void multipole_merge(struct multipole *ma, struct multipole *mb); -void multipole_addpart(struct multipole *m, struct gpart *p); -void multipole_addparts(struct multipole *m, struct gpart *p, int N); -void multipole_init(struct multipole *m, struct gpart *parts, int N); +void multipole_add(struct multipole *m_sum, const struct multipole *m_term); +void multipole_init(struct multipole *m, const struct gpart *gparts, + int gcount); void multipole_reset(struct multipole *m); +/* static void multipole_iact_mm(struct multipole *ma, struct multipole *mb, */ +/* double *shift); */ +/* void multipole_addpart(struct multipole *m, struct gpart *p); */ +/* void multipole_addparts(struct multipole *m, struct gpart *p, int N); */ + /** * @brief Compute the pairwise interaction between two multipoles. * @@ -60,39 +63,39 @@ void multipole_reset(struct multipole *m); * @param mb The second #multipole. * @param shift The periodicity correction. */ - __attribute__((always_inline)) INLINE static void multipole_iact_mm( struct multipole *ma, struct multipole *mb, double *shift) { - - float dx[3], ir, r, r2 = 0.0f, acc; - int k; - - /* Compute the multipole distance. */ - for (k = 0; k < 3; k++) { - dx[k] = ma->x[k] - mb->x[k] - shift[k]; - r2 += dx[k] * dx[k]; - } - - /* Compute the normalized distance vector. */ - ir = 1.0f / sqrtf(r2); - r = r2 * ir; - - /* Evaluate the gravity kernel. */ - kernel_grav_eval(r, &acc); - - /* Scale the acceleration. */ - acc *= const_G * ir * ir * ir; - -/* Compute the forces on both multipoles. */ -#if multipole_order == 1 - float mma = ma->coeffs[0], mmb = mb->coeffs[0]; - for (k = 0; k < 3; k++) { - ma->a[k] -= dx[k] * acc * mmb; - mb->a[k] += dx[k] * acc * mma; - } -#else -#error( "Multipoles of order %i not yet implemented." , multipole_order ) -#endif + /* float dx[3], ir, r, r2 = 0.0f, acc; */ + /* int k; */ + + /* /\* Compute the multipole distance. *\/ */ + /* for (k = 0; k < 3; k++) { */ + /* dx[k] = ma->x[k] - mb->x[k] - shift[k]; */ + /* r2 += dx[k] * dx[k]; */ + /* } */ + + /* /\* Compute the normalized distance vector. *\/ */ + /* ir = 1.0f / sqrtf(r2); */ + /* r = r2 * ir; */ + + /* /\* Evaluate the gravity kernel. *\/ */ + /* kernel_grav_eval(r, &acc); */ + + /* /\* Scale the acceleration. *\/ */ + /* acc *= const_G * ir * ir * ir; */ + + /* /\* Compute the forces on both multipoles. *\/ */ + /* #if const_gravity_multipole_order == 1 */ + /* float mma = ma->coeffs[0], mmb = mb->coeffs[0]; */ + /* for (k = 0; k < 3; k++) { */ + /* ma->a[k] -= dx[k] * acc * mmb; */ + /* mb->a[k] += dx[k] * acc * mma; */ + /* } */ + /* #else */ + /* #error( "Multipoles of order %i not yet implemented." , + * const_gravity_multipole_order ) + */ + /* #endif */ } /** @@ -102,35 +105,36 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mm( * @param p The #gpart. * @param shift The periodicity correction. */ - __attribute__((always_inline)) INLINE static void multipole_iact_mp( struct multipole *m, struct gpart *p, double *shift) { - float dx[3], ir, r, r2 = 0.0f, acc; - int k; - - /* Compute the multipole distance. */ - for (k = 0; k < 3; k++) { - dx[k] = m->x[k] - p->x[k] - shift[k]; - r2 += dx[k] * dx[k]; - } - - /* Compute the normalized distance vector. */ - ir = 1.0f / sqrtf(r2); - r = r2 * ir; - - /* Evaluate the gravity kernel. */ - kernel_grav_eval(r, &acc); - - /* Scale the acceleration. */ - acc *= const_G * ir * ir * ir * m->coeffs[0]; - -/* Compute the forces on both multipoles. */ -#if multipole_order == 1 - for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc; -#else -#error( "Multipoles of order %i not yet implemented." , multipole_order ) -#endif + /* float dx[3], ir, r, r2 = 0.0f, acc; */ + /* int k; */ + + /* /\* Compute the multipole distance. *\/ */ + /* for (k = 0; k < 3; k++) { */ + /* dx[k] = m->x[k] - p->x[k] - shift[k]; */ + /* r2 += dx[k] * dx[k]; */ + /* } */ + + /* /\* Compute the normalized distance vector. *\/ */ + /* ir = 1.0f / sqrtf(r2); */ + /* r = r2 * ir; */ + + /* /\* Evaluate the gravity kernel. *\/ */ + /* kernel_grav_eval(r, &acc); */ + + /* /\* Scale the acceleration. *\/ */ + /* acc *= const_G * ir * ir * ir * m->coeffs[0]; */ + + /* /\* Compute the forces on both multipoles. *\/ */ + /* #if const_gravity_multipole_order == 1 */ + /* for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc; */ + /* #else */ + /* #error( "Multipoles of order %i not yet implemented." , + * const_gravity_multipole_order ) + */ + /* #endif */ } #endif /* SWIFT_MULTIPOLE_H */ diff --git a/src/potentials.c b/src/potentials.c index 6818d8a234a437ca0164c5c991b92d905fd6041e..5cbe05e008b7de17310b1e738e032af90684c25e 100644 --- a/src/potentials.c +++ b/src/potentials.c @@ -47,7 +47,7 @@ void potential_init(const struct swift_params* parameter_file, potential->point_mass.mass = parser_get_param_double(parameter_file, "PointMass:mass"); potential->point_mass.timestep_mult = - parser_get_param_double(parameter_file, "PointMass:timestep_mult"); + parser_get_param_float(parameter_file, "PointMass:timestep_mult"); #endif /* EXTERNAL_POTENTIAL_POINTMASS */ @@ -61,7 +61,7 @@ void potential_init(const struct swift_params* parameter_file, parser_get_param_double(parameter_file, "IsothermalPotential:position_z"); potential->isothermal_potential.vrot = parser_get_param_double(parameter_file, "IsothermalPotential:vrot"); - potential->isothermal_potential.timestep_mult = parser_get_param_double( + potential->isothermal_potential.timestep_mult = parser_get_param_float( parameter_file, "IsothermalPotential:timestep_mult"); #endif /* EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL */ diff --git a/src/potentials.h b/src/potentials.h index 403acf12ad555740b6da803dbc1f2cde2223e266..5373cc1f3f55b0c6d9876cbe6348fdcefbe242aa 100644 --- a/src/potentials.h +++ b/src/potentials.h @@ -42,7 +42,7 @@ struct external_potential { struct { double x, y, z; double mass; - double timestep_mult; + float timestep_mult; } point_mass; #endif @@ -50,7 +50,7 @@ struct external_potential { struct { double x, y, z; double vrot; - double timestep_mult; + float timestep_mult; } isothermal_potential; #endif }; @@ -95,6 +95,8 @@ external_gravity_isothermalpotential_timestep( * @brief Computes the gravitational acceleration of a particle due to a point * mass * + * Note that the accelerations are multiplied by Newton's G constant later on. + * * @param potential The #external_potential used in the run. * @param phys_const The physical constants in internal units. * @param g Pointer to the g-particle data. @@ -104,15 +106,19 @@ external_gravity_isothermalpotential(const struct external_potential* potential, const struct phys_const* const phys_const, struct gpart* g) { + const float G_newton = phys_const->const_newton_G; + const float dx = g->x[0] - potential->isothermal_potential.x; const float dy = g->x[1] - potential->isothermal_potential.y; const float dz = g->x[2] - potential->isothermal_potential.z; const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz); const double vrot = potential->isothermal_potential.vrot; - g->a_grav[0] += -vrot * vrot * rinv2 * dx; - g->a_grav[1] += -vrot * vrot * rinv2 * dy; - g->a_grav[2] += -vrot * vrot * rinv2 * dz; + const double term = -vrot * vrot * rinv2 / G_newton; + + g->a_grav[0] += term * dx; + g->a_grav[1] += term * dy; + g->a_grav[2] += term * dz; // error(" %f %f %f %f", vrot, rinv2, dx, g->a_grav[0]); } @@ -158,6 +164,8 @@ external_gravity_pointmass_timestep(const struct external_potential* potential, * @brief Computes the gravitational acceleration of a particle due to a point * mass * + * Note that the accelerations are multiplied by Newton's G constant later on. + * * @param potential The proerties of the external potential. * @param phys_const The physical constants in internal units. * @param g Pointer to the g-particle data. @@ -166,18 +174,15 @@ __attribute__((always_inline)) INLINE static void external_gravity_pointmass( const struct external_potential* potential, const struct phys_const* const phys_const, struct gpart* g) { - const float G_newton = phys_const->const_newton_G; const float dx = g->x[0] - potential->point_mass.x; const float dy = g->x[1] - potential->point_mass.y; const float dz = g->x[2] - potential->point_mass.z; const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz); + const float rinv3 = rinv * rinv * rinv; - g->a_grav[0] += - -G_newton * potential->point_mass.mass * dx * rinv * rinv * rinv; - g->a_grav[1] += - -G_newton * potential->point_mass.mass * dy * rinv * rinv * rinv; - g->a_grav[2] += - -G_newton * potential->point_mass.mass * dz * rinv * rinv * rinv; + g->a_grav[0] += -potential->point_mass.mass * dx * rinv3; + g->a_grav[1] += -potential->point_mass.mass * dy * rinv3; + g->a_grav[2] += -potential->point_mass.mass * dz * rinv3; } #endif /* EXTERNAL_POTENTIAL_POINTMASS */ diff --git a/src/runner.c b/src/runner.c index 01762dda28aaa8f1fa3e240824761d90c9f8f702..6e99d8cece2cf086b1209ac2f8314301b9779e20 100644 --- a/src/runner.c +++ b/src/runner.c @@ -87,6 +87,10 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, #define FUNCTION force #include "runner_doiact.h" +/* Import the gravity loop functions. */ +#include "runner_doiact_fft.h" +#include "runner_doiact_grav.h" + /** * @brief Calculate gravity acceleration from external potential * @@ -115,7 +119,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { OUT; #endif - /* Loop over the parts in this cell. */ + /* Loop over the gparts in this cell. */ for (int i = 0; i < gcount; i++) { /* Get a direct pointer on the part. */ @@ -127,6 +131,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) { external_gravity(potential, constants, g); } } + if (timer) TIMER_TOC(timer_dograv_external); } @@ -424,11 +429,9 @@ void runner_do_init(struct runner *r, struct cell *c, int timer) { */ void runner_do_ghost(struct runner *r, struct cell *c) { - struct part *p, *parts = c->parts; - struct xpart *xp, *xparts = c->xparts; - struct cell *finger; + struct part *restrict parts = c->parts; + struct xpart *restrict xparts = c->xparts; int redo, count = c->count; - float h_corr; const int ti_current = r->e->ti_current; const double timeBase = r->e->timeBase; const float target_wcount = r->e->hydro_properties->target_neighbours; @@ -465,8 +468,8 @@ void runner_do_ghost(struct runner *r, struct cell *c) { for (int i = 0; i < count; i++) { /* Get a direct pointer on the part. */ - p = &parts[pid[i]]; - xp = &xparts[pid[i]]; + struct part *restrict p = &parts[pid[i]]; + struct xpart *restrict xp = &xparts[pid[i]]; /* Is this part within the timestep? */ if (p->ti_end <= ti_current) { @@ -474,6 +477,8 @@ void runner_do_ghost(struct runner *r, struct cell *c) { /* Finish the density calculation */ hydro_end_density(p, ti_current); + float h_corr = 0.f; + /* If no derivative, double the smoothing length. */ if (p->density.wcount_dh == 0.0f) h_corr = p->h; @@ -526,7 +531,7 @@ void runner_do_ghost(struct runner *r, struct cell *c) { if (count > 0) { /* Climb up the cell hierarchy. */ - for (finger = c; finger != NULL; finger = finger->parent) { + for (struct cell *finger = c; finger != NULL; finger = finger->parent) { /* Run through this cell's density interactions. */ for (struct link *l = finger->density; l != NULL; l = l->next) { @@ -745,6 +750,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) { struct part *restrict parts = c->parts; struct xpart *restrict xparts = c->xparts; struct gpart *restrict gparts = c->gparts; + const double const_G = r->e->physical_constants->const_newton_G; int updated = 0, g_updated = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; @@ -771,7 +777,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) { if (gp->id_or_neg_offset > 0) { /* First, finish the force calculation */ - gravity_end_force(gp); + gravity_end_force(gp, const_G); /* Kick the g-particle forward */ kick_gpart(gp, new_dti, timeBase); @@ -796,7 +802,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) { /* First, finish the force loop */ hydro_end_force(p); - if (p->gpart != NULL) gravity_end_force(p->gpart); + if (p->gpart != NULL) gravity_end_force(p->gpart, const_G); /* Kick the particle forward */ kick_part(p, xp, new_dti, timeBase); @@ -857,6 +863,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { struct part *restrict parts = c->parts; struct xpart *restrict xparts = c->xparts; struct gpart *restrict gparts = c->gparts; + const double const_G = r->e->physical_constants->const_newton_G; int updated = 0, g_updated = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; @@ -882,7 +889,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { if (gp->ti_end <= ti_current) { /* First, finish the force calculation */ - gravity_end_force(gp); + gravity_end_force(gp, const_G); /* Compute the next timestep */ const int new_dti = get_gpart_timestep(gp, e); @@ -914,7 +921,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) { /* First, finish the force loop */ hydro_end_force(p); - if (p->gpart != NULL) gravity_end_force(p->gpart); + if (p->gpart != NULL) gravity_end_force(p->gpart, const_G); /* Compute the next timestep (hydro condition) */ const int new_dti = get_part_timestep(p, xp, e); @@ -1071,6 +1078,8 @@ void *runner_main(void *data) { runner_doself1_density(r, ci); else if (t->subtype == task_subtype_force) runner_doself2_force(r, ci); + else if (t->subtype == task_subtype_grav) + runner_doself_grav(r, ci, 1); else error("Unknown task subtype."); break; @@ -1079,6 +1088,8 @@ void *runner_main(void *data) { runner_dopair1_density(r, ci, cj); else if (t->subtype == task_subtype_force) runner_dopair2_force(r, ci, cj); + else if (t->subtype == task_subtype_grav) + runner_dopair_grav(r, ci, cj, 1); else error("Unknown task subtype."); break; @@ -1090,6 +1101,8 @@ void *runner_main(void *data) { runner_dosub_self1_density(r, ci, 1); else if (t->subtype == task_subtype_force) runner_dosub_self2_force(r, ci, 1); + else if (t->subtype == task_subtype_grav) + runner_dosub_grav(r, ci, cj, 1); else error("Unknown task subtype."); break; @@ -1098,6 +1111,8 @@ void *runner_main(void *data) { runner_dosub_pair1_density(r, ci, cj, t->flags, 1); else if (t->subtype == task_subtype_force) runner_dosub_pair2_force(r, ci, cj, t->flags, 1); + else if (t->subtype == task_subtype_grav) + runner_dosub_grav(r, ci, cj, 1); else error("Unknown task subtype."); break; @@ -1129,6 +1144,17 @@ void *runner_main(void *data) { runner_do_recv_cell(r, ci, 1); } break; + case task_type_grav_mm: + runner_do_grav_mm(r, t->ci, 1); + break; + case task_type_grav_up: + runner_do_grav_up(r, t->ci); + break; + case task_type_grav_gather_m: + break; + case task_type_grav_fft: + runner_do_grav_fft(r); + break; case task_type_grav_external: runner_do_grav_external(r, t->ci, 1); break; diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c new file mode 100644 index 0000000000000000000000000000000000000000..076ec2578361127266c637cc5ac224609b702c66 --- /dev/null +++ b/src/runner_doiact_fft.c @@ -0,0 +1,36 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +#include <pthread.h> + +#ifdef HAVE_FFTW +#include <fftw3.h> +#endif + +/* This object's header. */ +#include "runner_doiact_fft.h" + +/* Local includes. */ +#include "runner.h" + +void runner_do_grav_fft(struct runner *r) {} diff --git a/src/runner_doiact_fft.h b/src/runner_doiact_fft.h new file mode 100644 index 0000000000000000000000000000000000000000..263662383fb465dcf945e55494a569289b009ff9 --- /dev/null +++ b/src/runner_doiact_fft.h @@ -0,0 +1,26 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_RUNNER_DOIACT_FFT_H +#define SWIFT_RUNNER_DOIACT_FFT_H + +struct runner; + +void runner_do_grav_fft(struct runner *r); + +#endif /* SWIFT_RUNNER_DOIACT_FFT_H */ diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index 401e48ffbb92218aecafd21a3f70c44c9a7b0a1b..a220ad1794d23999ff16752e797a499071fa2e65 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -1,6 +1,7 @@ /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk) + * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published @@ -19,575 +20,502 @@ #ifndef SWIFT_RUNNER_DOIACT_GRAV_H #define SWIFT_RUNNER_DOIACT_GRAV_H +/* Includes. */ +#include "cell.h" +#include "gravity.h" +#include "part.h" + +#define ICHECK -1000 + /** - * @brief Compute the sorted gravity interactions between a cell pair. + * @brief Compute the recursive upward sweep, i.e. construct the + * multipoles in a cell hierarchy. * * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. + * @param c The top-level #cell. */ +void runner_do_grav_up(struct runner *r, struct cell *c) { + + if (c->split) { /* Regular node */ + + /* Recurse. */ + for (int k = 0; k < 8; k++) + if (c->progeny[k] != NULL) runner_do_grav_up(r, c->progeny[k]); + + /* Collect the multipoles from the progeny. */ + multipole_reset(&c->multipole); + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + multipole_add(&c->multipole, &c->progeny[k]->multipole); + } -void runner_dopair_grav_new(struct runner *r, struct cell *ci, - struct cell *cj) { - - struct engine *restrict e = r->e; - int pid, pjd, k, sid; - double rshift, shift[3] = {0.0, 0.0, 0.0}, nshift[3]; - struct entry *restrict sort_i, *restrict sort_j; - struct gpart *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; - double pix[3]; - float dx[3], r2, h_max, di, dj; - int count_i, count_j, cnj, cnj_new; + } else { /* Leaf node. */ + /* Just construct the multipole from the gparts. */ + multipole_init(&c->multipole, c->gparts, c->gcount); + } +} + +/** + * @brief Computes the interaction of all the particles in a cell with the + * multipole of another cell. + * + * @param r The #runner. + * @param ci The #cell with particles to interct. + * @param cj The #cell with the multipole. + */ +__attribute__((always_inline)) INLINE static void runner_dopair_grav_pm( + const struct runner *r, const struct cell *restrict ci, + const struct cell *restrict cj) { + + const struct engine *e = r->e; + const int gcount = ci->gcount; + struct gpart *restrict gparts = ci->gparts; + const struct multipole multi = cj->multipole; const int ti_current = e->ti_current; - struct multipole m; -#ifdef WITH_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE]; + const float rlr_inv = 1. / (const_gravity_a_smooth * ci->super->width[0]); + + TIMER_TIC; + +#ifdef SWIFT_DEBUG_CHECKS + if (gcount == 0) error("Empty cell!"); // MATTHIEU sanity check + + if (multi.mass == 0.0) // MATTHIEU sanity check + error("Multipole does not seem to have been set."); #endif - TIMER_TIC /* Anything to do here? */ - if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; + if (ci->ti_end_min > ti_current) return; - /* Get the sort ID. */ - sid = space_getsid(e->s, &ci, &cj, shift); - - /* Make sure the cells are sorted. */ - // runner_do_gsort(r, ci, (1 << sid), 0); - // runner_do_gsort(r, cj, (1 << sid), 0); - - /* Have the cells been sorted? */ - if (!(ci->gsorted & (1 << sid)) || !(cj->gsorted & (1 << sid))) - error("Trying to interact unsorted cells."); - - /* Get the cutoff shift. */ - for (rshift = 0.0, k = 0; k < 3; k++) - rshift += shift[k] * runner_shift[sid][k]; - - /* Pick-out the sorted lists. */ - sort_i = &ci->gsort[sid * (ci->count + 1)]; - sort_j = &cj->gsort[sid * (cj->count + 1)]; - - /* Get some other useful values. */ - h_max = sqrtf(ci->width[0] * ci->width[0] + ci->width[1] * ci->width[1] + - ci->width[2] * ci->width[2]) * - const_theta_max; - count_i = ci->gcount; - count_j = cj->gcount; - parts_i = ci->gparts; - parts_j = cj->gparts; - cnj = count_j; - multipole_reset(&m); - nshift[0] = -shift[0]; - nshift[1] = -shift[1]; - nshift[2] = -shift[2]; - - /* Loop over the parts in ci. */ - for (pid = count_i - 1; pid >= 0; pid--) { +#if ICHECK > 0 + for (int pid = 0; pid < gcount; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[sort_i[pid].i]; - if (pi->ti_end > ti_current) continue; - di = sort_i[pid].d + h_max - rshift; - - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + struct gpart *restrict gp = &gparts[pid]; - /* Loop over the parts in cj. */ - for (pjd = 0; pjd < cnj && sort_j[pjd].d < di; pjd++) { + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count= %d", + gp->id_or_neg_offset, cj->loc[0], cj->loc[1], cj->loc[2], + cj->width[0], cj->gcount); + } +#endif - /* Get a pointer to the jth particle. */ - pj = &parts_j[sort_j[pjd].i]; + /* Loop over every particle in leaf. */ + for (int pid = 0; pid < gcount; pid++) { - /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gp = &gparts[pid]; -#ifndef WITH_VECTORIZATION + if (gp->ti_end > ti_current) continue; - // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 ) - // message( "interacting particles pi=%lli and pj=%lli with r=%.3e in - // cells %lli/%lli." , pi->part->id , pj->part->id , sqrtf(r2) , ((long - // long int)ci) / sizeof(struct cell) , ((long long int)cj) / - // sizeof(struct cell) ); + /* Compute the pairwise distance. */ + const float dx[3] = {multi.CoM[0] - gp->x[0], // x + multi.CoM[1] - gp->x[1], // y + multi.CoM[2] - gp->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - runner_iact_grav(r2, dx, pi, pj); + /* Interact !*/ + runner_iact_grav_pm(rlr_inv, r2, dx, gp, &multi); + } -#else + TIMER_TOC(timer_dopair_grav_pm); +} - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; +/** + * @brief Computes the interaction of all the particles in a cell with all the + * particles of another cell. + * + * @param r The #runner. + * @param ci The first #cell. + * @param cj The other #cell. + * + * @todo Use a local cache for the particles. + */ +__attribute__((always_inline)) INLINE static void runner_dopair_grav_pp( + struct runner *r, struct cell *ci, struct cell *cj) { + + const struct engine *e = r->e; + const int gcount_i = ci->gcount; + const int gcount_j = cj->gcount; + struct gpart *restrict gparts_i = ci->gparts; + struct gpart *restrict gparts_j = cj->gparts; + const int ti_current = e->ti_current; + const float rlr_inv = 1. / (const_gravity_a_smooth * ci->super->width[0]); - /* Flush? */ - if (icount == VEC_SIZE) { - runner_iact_vec_grav(r2q, dxq, piq, pjq); - icount = 0; - } + TIMER_TIC; +#ifdef SWIFT_DEBUG_CHECKS + if (ci->width[0] != cj->width[0]) // MATTHIEU sanity check + error("Non matching cell sizes !! h_i=%f h_j=%f", ci->width[0], + cj->width[0]); #endif - } /* loop over the parts in cj. */ + /* Anything to do here? */ + if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; + +#if ICHECK > 0 + for (int pid = 0; pid < gcount_i; pid++) { - /* Set the new limit. */ - cnj_new = pjd; + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gp = &gparts_i[pid]; - /* Add trailing parts to the multipole. */ - for (pjd = cnj_new; pjd < cnj; pjd++) { + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count= %d", + gp->id_or_neg_offset, cj->loc[0], cj->loc[1], cj->loc[2], + cj->width[0], cj->gcount); + } - /* Add the part to the multipole. */ - multipole_addpart(&m, &parts_j[sort_j[pjd].i]); + for (int pid = 0; pid < gcount_j; pid++) { - } /* add trailing parts to the multipole. */ + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gp = &gparts_j[pid]; - /* Set the new cnj. */ - cnj = cnj_new; + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count=%d", + gp->id_or_neg_offset, ci->loc[0], ci->loc[1], ci->loc[2], + ci->width[0], ci->gcount); + } +#endif - /* Interact the ith particle with the multipole. */ - multipole_iact_mp(&m, pi, nshift); + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount_i; pid++) { - } /* loop over the parts in ci. */ + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpi = &gparts_i[pid]; -#ifdef WITH_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (k = 0; k < icount; k++) - runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]); -#endif + /* Loop over every particle in the other cell. */ + for (int pjd = 0; pjd < gcount_j; pjd++) { - /* Re-set the multipole. */ - multipole_reset(&m); + /* Get a hold of the jth part in cj. */ + struct gpart *restrict gpj = &gparts_j[pjd]; - /* Loop over the parts in cj and interact with the multipole in ci. */ - for (pid = count_i - 1, pjd = 0; pjd < count_j; pjd++) { + /* Compute the pairwise distance. */ + float dx[3] = {gpi->x[0] - gpj->x[0], // x + gpi->x[1] - gpj->x[1], // y + gpi->x[2] - gpj->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - /* Get the position of pj along the axis. */ - dj = sort_j[pjd].d - h_max + rshift; + /* Interact ! */ + if (gpi->ti_end <= ti_current && gpj->ti_end <= ti_current) { - /* Add any left-over parts in cell_i to the multipole. */ - while (pid >= 0 && sort_i[pid].d < dj) { + runner_iact_grav_pp(rlr_inv, r2, dx, gpi, gpj); - /* Add this particle to the multipole. */ - multipole_addpart(&m, &parts_i[sort_i[pid].i]); + } else { - /* Decrease pid. */ - pid -= 1; - } + if (gpi->ti_end <= ti_current) { - /* Interact pj with the multipole. */ - multipole_iact_mp(&m, &parts_j[sort_j[pjd].i], shift); + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj); - } /* loop over the parts in cj and interact with the multipole. */ + } else if (gpj->ti_end <= ti_current) { - TIMER_TOC(TIMER_DOPAIR); + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi); + } + } + } + } + + TIMER_TOC(timer_dopair_grav_pp); } /** - * @brief Compute the recursive upward sweep, i.e. construct the - * multipoles in a cell hierarchy. + * @brief Computes the interaction of all the particles in a cell directly * * @param r The #runner. - * @param c The top-level #cell. + * @param c The #cell. + * + * @todo Use a local cache for the particles. */ +__attribute__((always_inline)) INLINE static void runner_doself_grav_pp( + struct runner *r, struct cell *c) { + + const struct engine *e = r->e; + const int gcount = c->gcount; + struct gpart *restrict gparts = c->gparts; + const int ti_current = e->ti_current; + const float rlr_inv = 1. / (const_gravity_a_smooth * c->super->width[0]); -void runner_dograv_up(struct runner *r, struct cell *c) { + TIMER_TIC; - /* Re-set this cell's multipole. */ - multipole_reset(&c->multipole); +#ifdef SWIFT_DEBUG_CHECKS + if (c->gcount == 0) // MATTHIEU sanity check + error("Empty cell !"); +#endif - /* Split? */ - if (c->split) { + /* Anything to do here? */ + if (c->ti_end_min > ti_current) return; - /* Recurse. */ - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_dograv_up(r, c->progeny[k]); +#if ICHECK > 0 + for (int pid = 0; pid < gcount; pid++) { - /* Collect the multipoles from the progeny. */ - multipole_reset(&c->multipole); - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) - multipole_merge(&c->multipole, &c->progeny[k]->multipole); + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gp = &gparts[pid]; + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count= %d", + gp->id_or_neg_offset, c->loc[0], c->loc[1], c->loc[2], + c->width[0], c->gcount); } +#endif - /* No, leaf node. */ - else + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount; pid++) { - /* Just collect the multipole. */ - multipole_init(&c->multipole, c->gparts, c->gcount); -} + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpi = &gparts[pid]; -/** - * @brief Compute the recursive downward sweep, i.e. apply the multipole - * acceleration on all the particles. - * - * @param r The #runner. - * @param c The top-level #cell. - */ + /* Loop over every particle in the other cell. */ + for (int pjd = pid + 1; pjd < gcount; pjd++) { + + /* Get a hold of the jth part in ci. */ + struct gpart *restrict gpj = &gparts[pjd]; -void runner_dograv_down(struct runner *r, struct cell *c) { + /* Compute the pairwise distance. */ + float dx[3] = {gpi->x[0] - gpj->x[0], // x + gpi->x[1] - gpj->x[1], // y + gpi->x[2] - gpj->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - struct multipole *m = &c->multipole; + /* Interact ! */ + if (gpi->ti_end <= ti_current && gpj->ti_end <= ti_current) { - /* Split? */ - if (c->split) { + runner_iact_grav_pp(rlr_inv, r2, dx, gpi, gpj); - /* Apply this cell's acceleration on the multipoles below. */ - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) { - struct multipole *mp = &c->progeny[k]->multipole; - mp->a[0] += m->a[0]; - mp->a[1] += m->a[1]; - mp->a[2] += m->a[2]; - } + } else { - /* Recurse. */ - for (int k = 0; k < 8; k++) - if (c->progeny[k] != NULL) runner_dograv_down(r, c->progeny[k]); + if (gpi->ti_end <= ti_current) { - } + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj); - /* No, leaf node. */ - else { + } else if (gpj->ti_end <= ti_current) { - /* Apply the multipole acceleration to all gparts. */ - for (int k = 0; k < c->gcount; k++) { - struct gpart *p = &c->gparts[k]; - p->a_grav[0] += m->a[0]; - p->a_grav[1] += m->a[1]; - p->a_grav[2] += m->a[2]; + dx[0] = -dx[0]; + dx[1] = -dx[1]; + dx[2] = -dx[2]; + runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi); + } + } } } + + TIMER_TOC(timer_doself_grav_pp); } /** - * @brief Compute the multipole-multipole interaction between two cells. + * @brief Computes the interaction of all the particles in a cell with all the + * particles of another cell. * * @param r The #runner. * @param ci The first #cell. - * @param cj The second #cell. + * @param cj The other #cell. + * @param gettimer Are we timing this ? + * + * @todo Use a local cache for the particles. */ +static void runner_dopair_grav(struct runner *r, struct cell *ci, + struct cell *cj, int gettimer) { -void runner_dograv_mm(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { - - struct engine *e = r->e; - int k; - double shift[3] = {0.0, 0.0, 0.0}; - float dx[3], theta; - - /* Compute the shift between the cells. */ - for (k = 0; k < 3; k++) { - dx[k] = cj->loc[k] - ci->loc[k]; - if (r->e->s->periodic) { - if (dx[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (dx[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - dx[k] += shift[k]; - } - } - theta = sqrt((dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) / - (ci->width[0] * ci->width[0] + ci->width[1] * ci->width[1] + - ci->width[2] * ci->width[2])); +#ifdef SWIFT_DEBUG_CHECKS - /* Do an MM or an MP/PM? */ - if (theta > const_theta_max * 4) { + const int gcount_i = ci->gcount; + const int gcount_j = cj->gcount; - /* Update the multipoles. */ - multipole_iact_mm(&ci->multipole, &cj->multipole, shift); + /* Early abort? */ + if (gcount_i == 0 || gcount_j == 0) error("Empty cell !"); - } else { + /* Bad stuff will happen if cell sizes are different */ + if (ci->width[0] != cj->width[0]) + error("Non matching cell sizes !! h_i=%f h_j=%f", ci->width[0], + cj->width[0]); - /* Interact the multipoles via their parts. */ - for (k = 0; k < ci->gcount; k++) - multipole_iact_mp(&cj->multipole, &ci->gparts[k], shift); - for (k = 0; k < cj->gcount; k++) - multipole_iact_mp(&ci->multipole, &cj->gparts[k], shift); - } -} + /* Sanity check */ + if (ci == cj) + error( + "The impossible has happened: pair interaction between a cell and " + "itself."); -/** - * @brief Compute the interactions between a cell pair. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - */ + /* Are the cells direct neighbours? */ + if (!cell_are_neighbours(ci, cj)) + error( + "Non-neighbouring cells ! ci->x=[%f %f %f] ci->width=%f cj->loc=[%f %f " + "%f] " + "cj->width=%f", + ci->loc[0], ci->loc[1], ci->loc[2], ci->width[0], cj->loc[0], + cj->loc[1], cj->loc[2], cj->width[0]); -void runner_dopair_grav(struct runner *r, struct cell *restrict ci, - struct cell *restrict cj) { - - struct engine *e = r->e; - int pid, pjd, k, count_i = ci->gcount, count_j = cj->gcount; - double shift[3] = {0.0, 0.0, 0.0}; - struct gpart *restrict parts_i = ci->gparts, *restrict parts_j = cj->gparts; - struct gpart *restrict pi, *restrict pj; - double pix[3]; - float dx[3], r2; - const int ti_current = r->e->ti_current; -#ifdef WITH_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE]; #endif - TIMER_TIC - /* Anything to do here? */ - if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return; - - /* Get the relative distance between the pairs, wrapping. */ - if (e->s->periodic) - for (k = 0; k < 3; k++) { - if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2) - shift[k] = e->s->dim[k]; - else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2) - shift[k] = -e->s->dim[k]; - } - - /* Loop over the parts in ci. */ - for (pid = 0; pid < count_i; pid++) { +#if ICHECK > 0 + for (int pid = 0; pid < ci->gcount; pid++) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[pid]; - for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k]; + struct gpart *restrict gp = &ci->gparts[pid]; - /* Loop over the parts in cj. */ - for (pjd = 0; pjd < count_j; pjd++) { + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count= %d", + gp->id_or_neg_offset, cj->loc[0], cj->loc[1], cj->loc[2], + cj->width[0], cj->gcount); + } - /* Get a pointer to the jth particle. */ - pj = &parts_j[pjd]; + for (int pid = 0; pid < cj->gcount; pid++) { - /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; - } + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gp = &cj->gparts[pid]; -/* Compute the interaction. */ -#ifndef WITH_VECTORIZATION + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count= %d", + gp->id_or_neg_offset, ci->loc[0], ci->loc[1], ci->loc[2], + ci->width[0], ci->gcount); + } +#endif - // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 ) - // message( "interacting particles pi=%lli and pj=%lli with r=%.3e in - // cells %lli/%lli." , pi->part->id , pj->part->id , sqrtf(r2) , ((long - // long int)ci) / sizeof(struct cell) , ((long long int)cj) / - // sizeof(struct cell) ); + TIMER_TIC; - runner_iact_grav(r2, dx, pi, pj); + /* Are both cells split ? */ + if (ci->split && cj->split) { -#else + for (int j = 0; j < 8; j++) { + if (ci->progeny[j] != NULL) { - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; + for (int k = 0; k < 8; k++) { + if (cj->progeny[k] != NULL) { - /* Flush? */ - if (icount == VEC_SIZE) { - runner_iact_vec_grav(r2q, dxq, piq, pjq); - icount = 0; - } + if (cell_are_neighbours(ci->progeny[j], cj->progeny[k])) { -#endif + /* Recurse */ + runner_dopair_grav(r, ci->progeny[j], cj->progeny[k], 0); - } /* loop over the parts in cj. */ + } else { - } /* loop over the parts in ci. */ + /* Ok, here we can go for particle-multipole interactions */ + runner_dopair_grav_pm(r, ci->progeny[j], cj->progeny[k]); + runner_dopair_grav_pm(r, cj->progeny[k], ci->progeny[j]); + } + } + } + } + } + } else { /* Not split */ -#ifdef WITH_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (k = 0; k < icount; k++) - runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]); -#endif + /* Compute the interactions at this level directly. */ + runner_dopair_grav_pp(r, ci, cj); + } - TIMER_TOC(timer_dopair_grav); + if (gettimer) TIMER_TOC(timer_dosub_pair_grav); } /** - * @brief Compute the interactions within a cell. + * @brief Computes the interaction of all the particles in a cell * * @param r The #runner. - * @param c The #cell. + * @param c The first #cell. + * @param gettimer Are we timing this ? + * + * @todo Use a local cache for the particles. */ +static void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) { + +#ifdef SWIFT_DEBUG_CHECKS -void runner_doself_grav(struct runner *r, struct cell *restrict c) { - - int pid, pjd, k, count = c->gcount; - struct gpart *restrict parts = c->gparts; - struct gpart *restrict pi, *restrict pj; - double pix[3] = {0.0, 0.0, 0.0}; - float dx[3], r2; - const int ti_current = r->e->ti_current; -#ifdef WITH_VECTORIZATION - int icount = 0; - float r2q[VEC_SIZE] __attribute__((aligned(16))); - float dxq[3 * VEC_SIZE] __attribute__((aligned(16))); - struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE]; + /* Early abort? */ + if (c->gcount == 0) error("Empty cell !"); #endif - TIMER_TIC - /* Anything to do here? */ - if (c->ti_end_min > ti_current) return; + TIMER_TIC; - /* Loop over every part in c. */ - for (pid = 0; pid < count; pid++) { + /* If the cell is split, interact each progeny with itself, and with + each of its siblings. */ + if (c->split) { - /* Get a hold of the ith part in ci. */ - pi = &parts[pid]; - for (k = 0; k < 3; k++) pix[k] = pi->x[k]; + for (int j = 0; j < 8; j++) { + if (c->progeny[j] != NULL) { - /* Loop over every other part in c. */ - for (pjd = pid + 1; pjd < count; pjd++) { + runner_doself_grav(r, c->progeny[j], 0); - /* Get a pointer to the jth particle. */ - pj = &parts[pjd]; + for (int k = j + 1; k < 8; k++) { + if (c->progeny[k] != NULL) { - /* Compute the pairwise distance. */ - r2 = 0.0f; - for (k = 0; k < 3; k++) { - dx[k] = pix[k] - pj->x[k]; - r2 += dx[k] * dx[k]; + runner_dopair_grav(r, c->progeny[j], c->progeny[k], 0); + } + } } + } + } -/* Compute the interaction. */ -#ifndef WITH_VECTORIZATION - - // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 ) - // message( "interacting particles pi=%lli and pj=%lli with r=%.3e." , - // pi->part->id , pj->part->id , sqrtf(r2) ); - - runner_iact_grav(r2, dx, pi, pj); + /* If the cell is not split, then just go for it... */ + else { -#else + runner_doself_grav_pp(r, c); + } - /* Add this interaction to the queue. */ - r2q[icount] = r2; - dxq[3 * icount + 0] = dx[0]; - dxq[3 * icount + 1] = dx[1]; - dxq[3 * icount + 2] = dx[2]; - piq[icount] = pi; - pjq[icount] = pj; - icount += 1; + if (gettimer) TIMER_TOC(timer_dosub_self_grav); +} - /* Flush? */ - if (icount == VEC_SIZE) { - runner_iact_vec_grav(r2q, dxq, piq, pjq); - icount = 0; - } +static void runner_dosub_grav(struct runner *r, struct cell *ci, + struct cell *cj, int timer) { -#endif + /* Is this a single cell? */ + if (cj == NULL) { - } /* loop over the remaining parts in c. */ + runner_doself_grav(r, ci, 1); - } /* loop over the parts in c. */ + } else { -#ifdef WITH_VECTORIZATION - /* Pick up any leftovers. */ - if (icount > 0) - for (k = 0; k < icount; k++) - runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]); +#ifdef SWIFT_DEBUG_CHECKS + if (!cell_are_neighbours(ci, cj)) + error("Non-neighbouring cells in pair task !"); #endif - TIMER_TOC(timer_doself_grav); + runner_dopair_grav(r, ci, cj, 1); + } } -/** - * @brief Compute a gravity sub-task. - * - * @param r The #runner. - * @param ci The first #cell. - * @param cj The second #cell. - * @param gettimer Flag to record timer or not. - */ - -void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, - int gettimer) { +static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) { - int j, k, periodic = r->e->s->periodic; - struct space *s = r->e->s; - - TIMER_TIC - - /* Self-interaction? */ - if (cj == NULL) { +#if ICHECK > 0 + for (int pid = 0; pid < ci->gcount; pid++) { - /* If the cell is split, recurse. */ - if (ci->split) { - - /* Split this task into tasks on its progeny. */ - for (j = 0; j < 8; j++) - if (ci->progeny[j] != NULL) { - runner_dosub_grav(r, ci->progeny[j], NULL, 0); - for (k = j + 1; k < 8; k++) - if (ci->progeny[k] != NULL) - runner_dosub_grav(r, ci->progeny[j], ci->progeny[k], 0); - } + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gp = &ci->gparts[pid]; - } + if (gp->id_or_neg_offset == ICHECK) + message("id=%lld loc=[ %f %f %f ] size= %f count= %d", + gp->id_or_neg_offset, ci->loc[0], ci->loc[1], ci->loc[2], + ci->width[0], ci->gcount); + } +#endif - /* Otherwise, just make a pp task out of it. */ - else - runner_doself_grav(r, ci); + /* Recover the list of top-level cells */ + const struct engine *e = r->e; + struct cell *cells = e->s->cells; + const int nr_cells = e->s->nr_cells; + const int ti_current = e->ti_current; + const double max_d = + const_gravity_a_smooth * const_gravity_r_cut * ci->width[0]; + const double max_d2 = max_d * max_d; + const double pos_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]}; - } + /* Anything to do here? */ + if (ci->ti_end_min > ti_current) return; - /* Nope, pair. */ - else { + /* Loop over all the cells and go for a p-m interaction if far enough but not + * too far */ + for (int i = 0; i < nr_cells; ++i) { - /* Get the opening angle theta. */ - float dx[3], theta; - for (k = 0; k < 3; k++) { - dx[k] = fabs(ci->loc[k] - cj->loc[k]); - if (periodic && dx[k] > 0.5 * s->dim[k]) dx[k] = -dx[k] + s->dim[k]; - if (dx[k] > 0.0f) dx[k] -= ci->width[k]; - } - theta = (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) / - (ci->width[0] * ci->width[0] + ci->width[1] * ci->width[1] + - ci->width[2] * ci->width[2]); - - /* Split the interaction? */ - if (theta < const_theta_max * const_theta_max) { - - /* Are both ci and cj split? */ - if (ci->split && cj->split) { - - /* Split this task into tasks on its progeny. */ - for (j = 0; j < 8; j++) - if (ci->progeny[j] != NULL) { - for (k = 0; k < 8; k++) - if (cj->progeny[k] != NULL) - runner_dosub_grav(r, ci->progeny[j], cj->progeny[k], 0); - } + struct cell *cj = &cells[i]; - } + if (ci == cj) continue; - /* Otherwise, make a pp task out of it. */ - else - runner_dopair_grav(r, ci, cj); + const double dx[3] = {cj->loc[0] - pos_i[0], // x + cj->loc[1] - pos_i[1], // y + cj->loc[2] - pos_i[2]}; // z + const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; - } + if (r2 > max_d2) continue; - /* Otherwise, mm interaction is fine. */ - else - runner_dograv_mm(r, ci, cj); + if (!cell_are_neighbours(ci, cj)) runner_dopair_grav_pm(r, ci, cj); } - - if (gettimer) TIMER_TOC(timer_dosub_grav); } + #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/scheduler.c b/src/scheduler.c index 9b389c8b9ee4d4f844c44707d083542b33137ade..6a0d886bd5458028c5c05812f10c204bc8946a1a 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -173,7 +173,8 @@ void scheduler_splittasks(struct scheduler *s) { if (ci->split) { /* Make a sub? */ - if (scheduler_dosub && ci->count < space_subsize / ci->count) { + if (scheduler_dosub && (ci->count * ci->count < space_subsize || + ci->gcount * ci->gcount < space_subsize)) { /* convert to a self-subtask. */ t->type = task_type_sub_self; @@ -204,11 +205,10 @@ void scheduler_splittasks(struct scheduler *s) { ci->progeny[j], ci->progeny[k], 0); } } - } - /* Pair interaction? */ - else if (t->type == task_type_pair) { + /* Hydro Pair interaction? */ + else if (t->type == task_type_pair && t->subtype != task_subtype_grav) { /* Get a handle on the cells involved. */ struct cell *ci = t->ci; @@ -234,7 +234,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Replace by a single sub-task? */ if (scheduler_dosub && - ci->count * sid_scale[sid] < space_subsize / cj->count && + ci->count * cj->count * sid_scale[sid] < space_subsize && sid != 0 && sid != 2 && sid != 6 && sid != 8) { /* Make this task a sub task. */ @@ -518,6 +518,17 @@ void scheduler_splittasks(struct scheduler *s) { } /* pair interaction? */ + /* Long-range gravity interaction ? */ + else if (t->type == task_type_grav_mm) { + + /* Get a handle on the cells involved. */ + struct cell *ci = t->ci; + + /* Safety thing */ + if (ci->gcount == 0) t->type = task_type_none; + + } /* gravity interaction? */ + } /* loop over all tasks. */ } diff --git a/src/space.c b/src/space.c index ea652f7f9b918e7f4f1ebe0a81e6604765691eb1..8b7e5f81b539fb02da44dac67a3575356c08c45e 100644 --- a/src/space.c +++ b/src/space.c @@ -205,6 +205,12 @@ void space_regrid(struct space *s, double cell_max, int verbose) { "Must have at least 3 cells in each spatial dimension when periodicity " "is switched on."); + /* Check if we have enough cells for gravity. */ + if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8)) + error( + "Must have at least 8 cells in each spatial dimension when gravity " + "is switched on."); + /* In MPI-Land, changing the top-level cell size requires that the * global partition is recomputed and the particles redistributed. * Be prepared to do that. */ @@ -1421,7 +1427,8 @@ struct cell *space_getcell(struct space *s) { * @param Npart The number of Gas particles in the space. * @param Ngpart The number of Gravity particles in the space. * @param periodic flag whether the domain is periodic or not. - * @param verbose Print messages to stdout or not + * @param gravity flag whether we are doing gravity or not. + * @param verbose Print messages to stdout or not. * @param dry_run If 1, just initialise stuff, don't do anything with the parts. * * Makes a grid of edge length > r_max and fills the particles @@ -1432,8 +1439,8 @@ struct cell *space_getcell(struct space *s) { void space_init(struct space *s, const struct swift_params *params, double dim[3], struct part *parts, struct gpart *gparts, - size_t Npart, size_t Ngpart, int periodic, int verbose, - int dry_run) { + size_t Npart, size_t Ngpart, int periodic, int gravity, + int verbose, int dry_run) { /* Clean-up everything */ bzero(s, sizeof(struct space)); @@ -1443,6 +1450,7 @@ void space_init(struct space *s, const struct swift_params *params, s->dim[1] = dim[1]; s->dim[2] = dim[2]; s->periodic = periodic; + s->gravity = gravity; s->nr_parts = Npart; s->size_parts = Npart; s->parts = parts; diff --git a/src/space.h b/src/space.h index a859ff5cb0497276ba5ae6dcec6a47d0ca5d7398..180759f2af69dd0d77e6e681229353358d554291 100644 --- a/src/space.h +++ b/src/space.h @@ -98,6 +98,9 @@ struct space { /* Is the space periodic? */ int periodic; + /* Are we doing gravity? */ + int gravity; + /* General-purpose lock for this space. */ swift_lock_type lock; @@ -141,8 +144,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift); void space_init(struct space *s, const struct swift_params *params, double dim[3], struct part *parts, struct gpart *gparts, - size_t Npart, size_t Ngpart, int periodic, int verbose, - int dry_run); + size_t Npart, size_t Ngpart, int periodic, int gravity, + int verbose, int dry_run); void space_map_cells_pre(struct space *s, int full, void (*fun)(struct cell *c, void *data), void *data); void space_map_parts(struct space *s, diff --git a/src/task.c b/src/task.c index f3c6a8f711df1a0506567216d1d4021c88c10f58..e9404ab00df4f757f49d6d186f28dc40c49cfa01 100644 --- a/src/task.c +++ b/src/task.c @@ -47,20 +47,19 @@ /* Task type names. */ const char *taskID_names[task_type_count] = { - "none", "sort", "self", "pair", "sub_self", - "sub_pair", "init", "ghost", "drift", "kick", - "kick_fixdt", "send", "recv", "grav_pp", "grav_mm", - "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort", - "split_cell", "rewait"}; + "none", "sort", "self", "pair", "sub", + "init", "ghost", "drift", "kick", "kick_fixdt", + "send", "recv", "grav_gather_m", "grav_fft", "grav_mm", + "grav_up", "grav_external", "part_sort", "gpart_sort", "split_cell", + "rewait"}; const char *subtaskID_names[task_type_count] = {"none", "density", "force", - "grav", "t_end"}; + "grav"}; /** * @brief Computes the overlap between the parts array of two given cells. */ - -size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) { +size_t task_cell_overlap_part(const struct cell *ci, const struct cell *cj) { if (ci == NULL || cj == NULL) return 0; if (ci->parts <= cj->parts && ci->parts + ci->count >= cj->parts + cj->count) { @@ -72,6 +71,95 @@ size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) { return 0; } +/** + * @brief Computes the overlap between the gparts array of two given cells. + */ +size_t task_cell_overlap_gpart(const struct cell *ci, const struct cell *cj) { + if (ci == NULL || cj == NULL) return 0; + if (ci->gparts <= cj->gparts && + ci->gparts + ci->gcount >= cj->gparts + cj->gcount) { + return cj->gcount; + } else if (cj->gparts <= ci->gparts && + cj->gparts + cj->gcount >= ci->gparts + ci->gcount) { + return ci->gcount; + } + return 0; +} + +/** + * @brief Returns the #task_actions for a given task. + * + * @param t The #task. + */ +enum task_actions task_acts_on(const struct task *t) { + + switch (t->type) { + + case task_type_none: + return task_action_none; + break; + + case task_type_sort: + case task_type_ghost: + return task_action_part; + break; + + case task_type_self: + case task_type_pair: + case task_type_sub_self: + case task_type_sub_pair: + switch (t->subtype) { + + case task_subtype_density: + case task_subtype_force: + return task_action_part; + break; + + case task_subtype_grav: + return task_action_gpart; + break; + + default: + error("Unknow task_action for task"); + return task_action_none; + break; + } + break; + + case task_type_init: + case task_type_drift: + case task_type_kick: + case task_type_kick_fixdt: + case task_type_send: + case task_type_recv: + return task_action_all; + break; + + case task_type_grav_gather_m: + case task_type_grav_fft: + case task_type_grav_mm: + case task_type_grav_up: + return task_action_multipole; + break; + + case task_type_grav_external: + return task_action_gpart; + break; + + case task_type_part_sort: + case task_type_gpart_sort: + case task_type_split_cell: + case task_type_rewait: + return task_action_none; + break; + + default: + error("Unknow task_action for task"); + return task_action_none; + break; + } +} + /** * @brief Compute the Jaccard similarity of the data used by two * different tasks. @@ -79,31 +167,64 @@ size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) { * @param ta The first #task. * @param tb The second #task. */ - float task_overlap(const struct task *ta, const struct task *tb) { + + if (ta == NULL || tb == NULL) return 0.f; + + const enum task_actions ta_act = task_acts_on(ta); + const enum task_actions tb_act = task_acts_on(tb); + /* First check if any of the two tasks are of a type that don't use cells. */ - if (ta == NULL || tb == NULL || ta->type == task_type_none || - ta->type == task_type_part_sort || ta->type == task_type_gpart_sort || - ta->type == task_type_split_cell || ta->type == task_type_rewait || - tb->type == task_type_none || tb->type == task_type_part_sort || - tb->type == task_type_gpart_sort || tb->type == task_type_split_cell || - tb->type == task_type_rewait) - return 0.0f; - - /* Compute the union of the cell data. */ - size_t size_union = 0; - if (ta->ci != NULL) size_union += ta->ci->count; - if (ta->cj != NULL) size_union += ta->cj->count; - if (tb->ci != NULL) size_union += tb->ci->count; - if (tb->cj != NULL) size_union += tb->cj->count; - - /* Compute the intersection of the cell data. */ - const size_t size_intersect = - task_cell_overlap(ta->ci, tb->ci) + task_cell_overlap(ta->ci, tb->cj) + - task_cell_overlap(ta->cj, tb->ci) + task_cell_overlap(ta->cj, tb->cj); - - return ((float)size_intersect) / (size_union - size_intersect); + if (ta_act == task_action_none || tb_act == task_action_none) return 0.f; + + const int ta_part = (ta_act == task_action_part || ta_act == task_action_all); + const int ta_gpart = + (ta_act == task_action_gpart || ta_act == task_action_all); + const int tb_part = (tb_act == task_action_part || tb_act == task_action_all); + const int tb_gpart = + (tb_act == task_action_gpart || tb_act == task_action_all); + + /* In the case where both tasks act on parts */ + if (ta_part && tb_part) { + + /* Compute the union of the cell data. */ + size_t size_union = 0; + if (ta->ci != NULL) size_union += ta->ci->count; + if (ta->cj != NULL) size_union += ta->cj->count; + if (tb->ci != NULL) size_union += tb->ci->count; + if (tb->cj != NULL) size_union += tb->cj->count; + + /* Compute the intersection of the cell data. */ + const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) + + task_cell_overlap_part(ta->ci, tb->cj) + + task_cell_overlap_part(ta->cj, tb->ci) + + task_cell_overlap_part(ta->cj, tb->cj); + + return ((float)size_intersect) / (size_union - size_intersect); + } + + /* In the case where both tasks act on gparts */ + else if (ta_gpart && tb_gpart) { + + /* Compute the union of the cell data. */ + size_t size_union = 0; + if (ta->ci != NULL) size_union += ta->ci->gcount; + if (ta->cj != NULL) size_union += ta->cj->gcount; + if (tb->ci != NULL) size_union += tb->ci->gcount; + if (tb->cj != NULL) size_union += tb->cj->gcount; + + /* Compute the intersection of the cell data. */ + const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) + + task_cell_overlap_gpart(ta->ci, tb->cj) + + task_cell_overlap_gpart(ta->cj, tb->ci) + + task_cell_overlap_gpart(ta->cj, tb->cj); + + return ((float)size_intersect) / (size_union - size_intersect); + } + + /* Else, no overlap */ + return 0.f; } /** @@ -111,26 +232,41 @@ float task_overlap(const struct task *ta, const struct task *tb) { * * @param t The #task. */ - void task_unlock(struct task *t) { + const int type = t->type; + const int subtype = t->subtype; + struct cell *ci = t->ci, *cj = t->cj; + /* Act based on task type. */ - switch (t->type) { + switch (type) { + + case task_type_sort: + cell_unlocktree(ci); + break; + case task_type_self: case task_type_sub_self: - case task_type_sort: - cell_unlocktree(t->ci); + if (subtype == task_subtype_grav) { + cell_gunlocktree(ci); + } else { + cell_unlocktree(ci); + } break; + case task_type_pair: case task_type_sub_pair: - cell_unlocktree(t->ci); - cell_unlocktree(t->cj); + if (subtype == task_subtype_grav) { + cell_gunlocktree(ci); + cell_gunlocktree(cj); + } else { + cell_unlocktree(ci); + cell_unlocktree(cj); + } break; - case task_type_grav_pp: + case task_type_grav_mm: - case task_type_grav_down: - cell_gunlocktree(t->ci); - if (t->cj != NULL) cell_gunlocktree(t->cj); + cell_gunlocktree(ci); break; default: break; @@ -142,7 +278,6 @@ void task_unlock(struct task *t) { * * @param t the #task. */ - int task_lock(struct task *t) { const int type = t->type; @@ -205,6 +340,10 @@ int task_lock(struct task *t) { } break; + case task_type_grav_mm: + cell_glocktree(ci); + break; + default: break; } diff --git a/src/task.h b/src/task.h index 8dea503e42cdba9a188112cfa668c464c69f9959..ee49568b143282b9e3025f6d2bc81ded04ffee41 100644 --- a/src/task.h +++ b/src/task.h @@ -46,10 +46,10 @@ enum task_types { task_type_kick_fixdt, task_type_send, task_type_recv, - task_type_grav_pp, + task_type_grav_gather_m, + task_type_grav_fft, task_type_grav_mm, task_type_grav_up, - task_type_grav_down, task_type_grav_external, task_type_part_sort, task_type_gpart_sort, @@ -70,6 +70,16 @@ enum task_subtypes { task_subtype_count }; +/* The kind of action the task perform */ +enum task_actions { + task_action_none, + task_action_part, + task_action_gpart, + task_action_all, + task_action_multipole, + task_action_count +}; + extern const char *subtaskID_names[]; /* Data of a task. */ @@ -102,5 +112,6 @@ int task_lock(struct task *t); void task_print_mask(unsigned int mask); void task_print_submask(unsigned int submask); void task_do_rewait(struct task *t); +enum task_actions task_acts_on(const struct task *t); #endif /* SWIFT_TASK_H */ diff --git a/src/timers.h b/src/timers.h index c48961b39737f23eb936d7283f76651d33892991..aa8455397daf1e88b709a8332e3ae63694991e94 100644 --- a/src/timers.h +++ b/src/timers.h @@ -37,10 +37,11 @@ enum { timer_dosort, timer_doself_density, timer_doself_force, - timer_doself_grav, + timer_doself_grav_pp, timer_dopair_density, timer_dopair_force, - timer_dopair_grav, + timer_dopair_grav_pm, + timer_dopair_grav_pp, timer_dograv_external, timer_dosub_self_density, timer_dosub_self_force, diff --git a/src/tools.c b/src/tools.c index 50ad48af38285062738c7c8ce12e0f4535086dd5..b4a37328ff7df9de76adceebb5ed6801fa417851 100644 --- a/src/tools.c +++ b/src/tools.c @@ -426,7 +426,7 @@ void pairs_single_grav(double *dim, long long int pid, fdx[i] = dx[i]; } r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2]; - runner_iact_grav(r2, fdx, &pi, &pj); + runner_iact_grav_pp(0.f, r2, fdx, &pi, &pj); a[0] += pi.a_grav[0]; a[1] += pi.a_grav[1]; a[2] += pi.a_grav[2]; @@ -600,3 +600,65 @@ void shuffle_particles(struct part *parts, const int count) { } else error("Array not big enough to shuffle!"); } + +/** + * @brief Computes the forces between all g-particles using the N^2 algorithm + * + * Overwrites the accelerations of the gparts with the values. + * Do not use for actual runs. + * + * @brief gparts The array of particles. + * @brief gcount The number of particles. + */ +void gravity_n2(struct gpart *gparts, const int gcount, + const struct phys_const *constants, float rlr) { + + const float rlr_inv = 1. / rlr; + const float max_d = const_gravity_r_cut * rlr; + const float max_d2 = max_d * max_d; + + message("rlr_inv= %f", rlr_inv); + message("max_d: %f", max_d); + + /* Reset everything */ + for (int pid = 0; pid < gcount; pid++) { + struct gpart *restrict gpi = &gparts[pid]; + gpi->a_grav[0] = 0.f; + gpi->a_grav[1] = 0.f; + gpi->a_grav[2] = 0.f; + } + + /* Loop over all particles in ci... */ + for (int pid = 0; pid < gcount; pid++) { + + /* Get a hold of the ith part in ci. */ + struct gpart *restrict gpi = &gparts[pid]; + + for (int pjd = pid + 1; pjd < gcount; pjd++) { + + /* Get a hold of the jth part in ci. */ + struct gpart *restrict gpj = &gparts[pjd]; + + /* Compute the pairwise distance. */ + const float dx[3] = {gpi->x[0] - gpj->x[0], // x + gpi->x[1] - gpj->x[1], // y + gpi->x[2] - gpj->x[2]}; // z + const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]; + + if (r2 < max_d2 || 1) { + + /* Apply the gravitational acceleration. */ + runner_iact_grav_pp(rlr_inv, r2, dx, gpi, gpj); + } + } + } + + /* Multiply by Newton's constant */ + const double const_G = constants->const_newton_G; + for (int pid = 0; pid < gcount; pid++) { + struct gpart *restrict gpi = &gparts[pid]; + gpi->a_grav[0] *= const_G; + gpi->a_grav[1] *= const_G; + gpi->a_grav[2] *= const_G; + } +} diff --git a/src/tools.h b/src/tools.h index ba5b2f8c7f118edb042c825ff00907ad807b3d55..43ddd946c3e8cdf53139bb917135dffd8a8acd12 100644 --- a/src/tools.h +++ b/src/tools.h @@ -22,12 +22,9 @@ #ifndef SWIFT_TOOL_H #define SWIFT_TOOL_H -/* Config parameters. */ -#include "../config.h" - -/* Includes. */ #include "cell.h" #include "part.h" +#include "physical_constants.h" #include "runner.h" void factor(int value, int *f1, int *f2); @@ -47,5 +44,7 @@ void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic); double random_uniform(double a, double b); void shuffle_particles(struct part *parts, const int count); +void gravity_n2(struct gpart *gparts, const int gcount, + const struct phys_const *constants, float rlr); #endif /* SWIFT_TOOL_H */ diff --git a/src/version.c b/src/version.c index ab22c0d9ab8841dedd09aa83aa8803e468e69ce9..68a051fa08c53c68319c1e785c0c6503afe18f4d 100644 --- a/src/version.c +++ b/src/version.c @@ -33,6 +33,10 @@ #include <hdf5.h> #endif +#ifdef HAVE_FFTW +#include <fftw3.h> +#endif + /* Some standard headers. */ #include <stdio.h> #include <string.h> @@ -238,6 +242,22 @@ const char *metis_version(void) { return version; } +/** + * @brief return the FFTW version used when SWIFT was built. + * + * @result description of the FFTW version. + */ +const char *fftw3_version(void) { + + static char version[256] = {0}; +#if defined(HAVE_FFTW) + sprintf(version, "%s", "3.x (details not available)"); +#else + sprintf(version, "Unknown version"); +#endif + return version; +} + /** * @brief Prints a greeting message to the standard output containing code * version and revision number @@ -259,6 +279,9 @@ void greetings(void) { #ifdef HAVE_HDF5 printf(" HDF5 library version: %s\n", hdf5_version()); #endif +#ifdef HAVE_FFTW + printf(" FFTW library version: %s\n", fftw3_version()); +#endif #ifdef WITH_MPI printf(" MPI library: %s\n", mpi_version()); #ifdef HAVE_METIS diff --git a/src/version.h b/src/version.h index 4b6b38d220b36e5cb340571cd89e6f52a7b21a8e..0d568f312cf775cf932d580a49da7c19c9e14b21 100644 --- a/src/version.h +++ b/src/version.h @@ -27,8 +27,9 @@ const char* git_branch(void); const char* compiler_name(void); const char* compiler_version(void); const char* mpi_version(void); -const char* hdf5_version(void); const char* metis_version(void); +const char* hdf5_version(void); +const char* fftw3_version(void); void greetings(void); #endif /* SWIFT_VERSION_H */ diff --git a/tests/Makefile.am b/tests/Makefile.am index 4d6cb65ca6971d30fd6e39f52ce582c59cecb8b8..2adce2c90985b537a668adfccec0d5241113314e 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -18,17 +18,17 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) -DTIMER -AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) +AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS) # List of programs and scripts to run in the test suite TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \ testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \ - testParser.sh testSPHStep test125cells.sh + testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT # List of test programs to compile check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ testSPHStep testPair test27cells test125cells testParser \ - testKernel testInteractions testMaths testSymmetry + testKernel testKernelGrav testFFT testInteractions testMaths testSymmetry # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -37,8 +37,6 @@ testMaths_SOURCES = testMaths.c testReading_SOURCES = testReading.c -testKernel_SOURCES = testKernel.c - testSymmetry_SOURCES = testSymmetry.c testTimeIntegration_SOURCES = testTimeIntegration.c @@ -55,6 +53,12 @@ test125cells_SOURCES = test125cells.c testParser_SOURCES = testParser.c +testKernel_SOURCES = testKernel.c + +testKernelGrav_SOURCES = testKernelGrav.c + +testFFT_SOURCES = testFFT.c + testInteractions_SOURCES = testInteractions.c # Files necessary for distribution diff --git a/tests/testFFT.c b/tests/testFFT.c new file mode 100644 index 0000000000000000000000000000000000000000..7ff29dac3f0e9b7d6aec65cbfc7e4f4b85d08dbb --- /dev/null +++ b/tests/testFFT.c @@ -0,0 +1,189 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Some standard headers. */ +#include <stdlib.h> +#include <string.h> + +#include <fftw3.h> + +/* Includes. */ +#include "swift.h" + +const double G = 1.; + +const size_t N = 16; +const size_t PMGRID = 8; + +// const double asmth = 2. * M_PI * const_gravity_a_smooth / boxSize; +// const double asmth2 = asmth * asmth; +// const double fact = G / (M_PI * boxSize) * (1. / (2. * boxSize / PMGRID)); + +int main() { + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + /* Simulation properties */ + const size_t count = N * N * N; + const double boxSize = 1.; + + /* Create some particles */ + struct gpart* gparts = malloc(count * sizeof(struct gpart)); + bzero(gparts, count * sizeof(struct gpart)); + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + for (size_t k = 0; k < N; ++k) { + + struct gpart* gp = &gparts[i * N * N + j * N + k]; + + gp->x[0] = i * boxSize / N + boxSize / (2 * N); + gp->x[1] = j * boxSize / N + boxSize / (2 * N); + gp->x[2] = k * boxSize / N + boxSize / (2 * N); + + gp->mass = 1. / count; + + gp->id_or_neg_offset = i * N * N + j * N + k; + } + } + } + + /* Properties of the mesh */ + const size_t meshmin[3] = {0, 0, 0}; + const size_t meshmax[3] = {PMGRID - 1, PMGRID - 1, PMGRID - 1}; + + const size_t dimx = meshmax[0] - meshmin[0] + 2; + const size_t dimy = meshmax[1] - meshmin[1] + 2; + const size_t dimz = meshmax[2] - meshmin[2] + 2; + + const double fac = PMGRID / boxSize; + const size_t PMGRID2 = 2 * (PMGRID / 2 + 1); + + /* message("dimx=%zd dimy=%zd dimz=%zd", dimx, dimy, dimz); */ + + /* Allocate and empty the workspace mesh */ + const size_t workspace_size = (dimx + 4) * (dimy + 4) * (dimz + 4); + double* workspace = fftw_malloc(workspace_size * sizeof(double)); + bzero(workspace, workspace_size * sizeof(double)); + + /* Do CIC with the particles */ + for (size_t pid = 0; pid < count; ++pid) { + + const struct gpart* const gp = &gparts[pid]; + + const size_t slab_x = + (fac * gp->x[0] >= PMGRID) ? PMGRID - 1 : fac * gp->x[0]; + const size_t slab_y = + (fac * gp->x[1] >= PMGRID) ? PMGRID - 1 : fac * gp->x[1]; + const size_t slab_z = + (fac * gp->x[2] >= PMGRID) ? PMGRID - 1 : fac * gp->x[2]; + + const double dx = fac * gp->x[0] - (double)slab_x; + const double dy = fac * gp->x[1] - (double)slab_y; + const double dz = fac * gp->x[2] - (double)slab_z; + + const size_t slab_xx = slab_x + 1; + const size_t slab_yy = slab_y + 1; + const size_t slab_zz = slab_z + 1; + + workspace[(slab_x * dimy + slab_y) * dimz + slab_z] += + gp->mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz); + workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] += + gp->mass * (1.0 - dx) * dy * (1.0 - dz); + workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] += + gp->mass * (1.0 - dx) * (1.0 - dy) * dz; + workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] += + gp->mass * (1.0 - dx) * dy * dz; + workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] += + gp->mass * (dx) * (1.0 - dy) * (1.0 - dz); + workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] += + gp->mass * (dx)*dy * (1.0 - dz); + workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] += + gp->mass * (dx) * (1.0 - dy) * dz; + workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] += + gp->mass * (dx)*dy * dz; + } + + /* for(size_t i = 0 ; i < dimx*dimy*dimz; ++i) */ + /* message("workspace[%zd] = %f", i, workspace[i]); */ + + /* Prepare the force grid */ + const size_t fft_size = workspace_size; + double* forcegrid = fftw_malloc(fft_size * sizeof(double)); + bzero(forcegrid, fft_size * sizeof(double)); + + const size_t sendmin = 0, recvmin = 0; + const size_t sendmax = PMGRID, recvmax = PMGRID; + + memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz, + (sendmax - sendmin + 1) * dimy * dimz * sizeof(double)); + + /* for (size_t i = 0; i < fft_size; ++i) */ + /* if (forcegrid[i] != workspace[i]) error("wrong"); */ + + /* Prepare the density grid */ + double* rhogrid = fftw_malloc(fft_size * sizeof(double)); + bzero(rhogrid, fft_size * sizeof(double)); + + /* Now get the density */ + for (size_t slab_x = recvmin; slab_x <= recvmax; slab_x++) { + + const size_t slab_xx = slab_x % PMGRID; + + for (size_t slab_y = recvmin; slab_y <= recvmax; slab_y++) { + + const size_t slab_yy = slab_y % PMGRID; + + for (size_t slab_z = recvmin; slab_z <= recvmax; slab_z++) { + + const size_t slab_zz = slab_z % PMGRID; + + rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] += + forcegrid[((slab_x - recvmin) * dimy + (slab_y - recvmin)) * dimz + + (slab_z - recvmin)]; + } + } + } + + /* for (size_t i = 0; i < 640; i++) { */ + /* if (rhogrid[i] != workspace[i]) { */ + /* message("rhogrid[%zd]= %f workspace[%zd]= %f forcegrid[%zd]= %f", i, */ + /* rhogrid[i], i, workspace[i], i, forcegrid[i]); */ + /* } */ + /* } */ + + /* FFT of the density field */ + fftw_complex* fftgrid = fftw_malloc(fft_size * sizeof(fftw_complex)); + fftw_plan plan_forward = fftw_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid, + fftgrid, FFTW_ESTIMATE); + fftw_execute(plan_forward); + + for (size_t i = 0; i < 640; i++) { + message("workspace[%zd]= %f", i, fftgrid[i][0]); + } + + /* Clean-up */ + fftw_destroy_plan(plan_forward); + fftw_free(forcegrid); + fftw_free(rhogrid); + fftw_free(workspace); + free(gparts); + return 0; +} diff --git a/tests/testKernelGrav.c b/tests/testKernelGrav.c new file mode 100644 index 0000000000000000000000000000000000000000..42ce6066b605c8cc9ac78a6e565dd59a500112f4 --- /dev/null +++ b/tests/testKernelGrav.c @@ -0,0 +1,112 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#include "const.h" +#include "kernel_gravity.h" +#include "kernel_long_gravity.h" + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#define numPoints (1 << 7) + +/** + * @brief The Gadget-2 gravity kernel function + * + * @param r The distance between particles + * @param h The cut-off distance of the kernel + */ +float gadget(float r, float h) { + float fac; + const float r2 = r * r; + if (r >= h) + fac = 1.0f / (r2 * r); + else { + const float h_inv = 1. / h; + const float h_inv3 = h_inv * h_inv * h_inv; + const float u = r * h_inv; + if (u < 0.5) + fac = h_inv3 * (10.666666666667 + u * u * (32.0 * u - 38.4)); + else + fac = + h_inv3 * (21.333333333333 - 48.0 * u + 38.4 * u * u - + 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u)); + } + return fac; +} + +int main() { + + const float h = 3.f; + const float r_max = 6.f; + + for (int k = 1; k < numPoints; ++k) { + + const float r = (r_max * k) / numPoints; + + const float u = r / h; + + const float gadget_w = gadget(r, h); + + float swift_w; + if (u < 1.) { + kernel_grav_eval(u, &swift_w); + swift_w *= (1 / (h * h * h)); + } else { + swift_w = 1 / (r * r * r); + } + + printf("%2d: r= %f h= %f u= %f Wg(r,h)= %f Ws(r,h)= %f\n", k, r, h, u, + gadget_w, swift_w); + + if (fabsf(gadget_w - swift_w) > 2e-7) { + printf("Invalid value ! Gadget= %e, SWIFT= %e\n", gadget_w, swift_w); + return 1; + } + } + + printf("\nAll values are consistent\n"); + + /* Now test the long range function */ + const float a_smooth = const_gravity_a_smooth; + + for (int k = 1; k < numPoints; ++k) { + + const float r = (r_max * k) / numPoints; + + const float u = r / a_smooth; + + float swift_w; + kernel_long_grav_eval(u, &swift_w); + + float gadget_w = erfc(u / 2) + u * exp(-u * u / 4) / sqrt(M_PI); + + printf("%2d: r= %f r_lr= %f u= %f Ws(r)= %f Wg(r)= %f\n", k, r, a_smooth, u, + swift_w, gadget_w); + + if (fabsf(gadget_w - swift_w) > 2e-7) { + printf("Invalid value ! Gadget= %e, SWIFT= %e\n", gadget_w, swift_w); + return 1; + } + } + + return 0; +}