diff --git a/.gitignore b/.gitignore
index dba88cee0989e00baef51d9e75a42b1141d15714..3521b353ec4b1c4dea7192047e197596f083fe99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,8 @@ tests/testSingle
tests/testTimeIntegration
tests/testSPHStep
tests/testKernel
+tests/testKernelGrav
+tests/testFFT
tests/testInteractions
tests/testSymmetry
tests/testMaths
diff --git a/configure.ac b/configure.ac
index 63d2ddad064a8cff12320cc52bbea32506bdaaeb..587ff45c688071592a56d9d9ac11605c2c3d7caa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -417,6 +417,17 @@ if test "$ac_cv_func_pthread_setaffinity_np" = "yes"; then
fi
fi
+# Check for FFTW
+have_fftw3="no"
+AC_CHECK_HEADER([fftw3.h])
+if test "$ac_cv_header_fftw3_h" = "yes"; then
+ AC_CHECK_LIB([fftw3],[fftw_malloc], [AC_DEFINE([HAVE_FFTW],
+ [1],[Defined if FFTW 3.x exists.])] )
+ FFTW_LIBS="-lfftw3"
+ have_fftw3="yes"
+fi
+AC_SUBST([FFTW_LIBS])
+
# Check for Intel intrinsics header optionally used by vector.h.
AC_CHECK_HEADERS([immintrin.h])
@@ -500,6 +511,7 @@ AC_MSG_RESULT([
HDF5 enabled : $with_hdf5
- parallel : $have_parallel_hdf5
Metis enabled : $have_metis
+ FFTW3 enabled : $have_fftw3
libNUMA enabled : $have_numa
Using tcmalloc : $have_tcmalloc
CPU profiler : $have_profiler
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 1fedca0c3847c3e4d7942de09cbf409bcd69b8c6..187abcf9898b8024b4e4f5089de7ca51e6dd2e3c 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -24,7 +24,7 @@ AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS)
AM_LDFLAGS = $(HDF5_LDFLAGS)
# Extra libraries.
-EXTRA_LIBS = $(HDF5_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
+EXTRA_LIBS = $(HDF5_LIBS) $(FFTW_LIBS) $(PROFILER_LIBS) $(TCMALLOC_LIBS)
# MPI libraries.
MPI_LIBS = $(METIS_LIBS) $(MPI_THREAD_LIBS)
diff --git a/examples/UniformDMBox/makeIC.py b/examples/UniformDMBox/makeIC.py
index 449d780fb31bc23dd194f772be45d35e6b0bbe3f..2aee89798a5b8bbd425a6b73528779fb1aa7db23 100644
--- a/examples/UniformDMBox/makeIC.py
+++ b/examples/UniformDMBox/makeIC.py
@@ -26,7 +26,7 @@ from numpy import *
# with a density of 1
# Parameters
-periodic= 1 # 1 For periodic box
+periodic= 0 # 1 For periodic box
boxSize = 1.
rho = 1.
L = int(sys.argv[1]) # Number of particles along one axis
diff --git a/examples/main.c b/examples/main.c
index 9cda03c14c364974a18e36c84e60227a01970dd5..30e27cc55a6ec508dc352f1acc7f2eed71d71bdd 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -399,8 +399,8 @@ int main(int argc, char *argv[]) {
/* Initialize the space with these data. */
if (myrank == 0) clocks_gettime(&tic);
struct space s;
- space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic, talking,
- dry_run);
+ space_init(&s, params, dim, parts, gparts, Ngas, Ngpart, periodic,
+ with_self_gravity, talking, dry_run);
if (myrank == 0) {
clocks_gettime(&toc);
message("space_init took %.3f %s.", clocks_diff(&tic, &toc),
diff --git a/src/Makefile.am b/src/Makefile.am
index c8554ce61940650898cfb21e282e0a6d174c79f1..4aa7278ba5162ec106fe25f10529f766f9a703a8 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -19,7 +19,7 @@
AM_CFLAGS = -DTIMER $(HDF5_CPPFLAGS)
# Assign a "safe" version number
-AM_LDFLAGS = $(HDF5_LDFLAGS) -version-info 0:0:0
+AM_LDFLAGS = $(HDF5_LDFLAGS) $(FFTW_LIBS) -version-info 0:0:0
# The git command, if available.
GIT_CMD = @GIT_CMD@
@@ -48,17 +48,18 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \
AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \
serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \
units.c common_io.c single_io.c multipole.c version.c map.c \
- kernel_hydro.c kernel_gravity.c tools.c part.c partition.c clocks.c parser.c \
- physical_constants.c potentials.c hydro_properties.c
+ kernel_hydro.c tools.c part.c partition.c clocks.c parser.c \
+ physical_constants.c potentials.c hydro_properties.c \
+ runner_doiact_fft.c
# Include files for distribution, not installation.
nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel_hydro.h kernel_gravity.h \
- vector.h runner_doiact.h runner_doiact_grav.h units.h intrinsics.h minmax.h kick.h \
- timestep.h drift.h adiabatic_index.h io_properties.h \
+ kernel_long_gravity.h vector.h runner_doiact.h runner_doiact_grav.h runner_doiact_fft.h \
+ units.h intrinsics.h minmax.h kick.h timestep.h drift.h adiabatic_index.h io_properties.h \
gravity.h gravity_io.h \
gravity/Default/gravity.h gravity/Default/gravity_iact.h gravity/Default/gravity_io.h \
gravity/Default/gravity_debug.h gravity/Default/gravity_part.h \
- hydro.h hydro_io.h \
+ hydro.h hydro_io.h \
hydro/Minimal/hydro.h hydro/Minimal/hydro_iact.h hydro/Minimal/hydro_io.h \
hydro/Minimal/hydro_debug.h hydro/Minimal/hydro_part.h \
hydro/Default/hydro.h hydro/Default/hydro_iact.h hydro/Default/hydro_io.h \
diff --git a/src/cell.c b/src/cell.c
index 2796a8933e1fedc2522c833570558adda369140b..7df55ce04ca739da2de6e6061048ad1fe3998a1e 100644
--- a/src/cell.c
+++ b/src/cell.c
@@ -709,6 +709,92 @@ void cell_clean_links(struct cell *c, void *data) {
}
/**
+<<<<<<< HEAD
+ * @brief Checks whether the cells are direct neighbours ot not. Both cells have
+ * to be of the same size
+ *
+ * @param ci First #cell.
+ * @param cj Second #cell.
+ *
+ * @todo Deal with periodicity.
+ */
+int cell_are_neighbours(const struct cell *restrict ci,
+ const struct cell *restrict cj) {
+
+#ifdef SWIFT_DEBUG_CHECKS
+ if (ci->width[0] != cj->width[0]) error("Cells of different size !");
+#endif
+
+ /* Maximum allowed distance */
+ const double min_dist =
+ 1.2 * ci->width[0]; /* 1.2 accounts for rounding errors */
+
+ /* (Manhattan) Distance between the cells */
+ for (int k = 0; k < 3; k++) {
+ const double center_i = ci->loc[k];
+ const double center_j = cj->loc[k];
+ if (fabsf(center_i - center_j) > min_dist) return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * @brief Computes the multi-pole brutally and compare to the
+ * recursively computed one.
+ *
+ * @param c Cell to act upon
+ * @param data Unused parameter
+ */
+void cell_check_multipole(struct cell *c, void *data) {
+
+ struct multipole ma;
+
+ if (c->gcount > 0) {
+
+ /* Brute-force calculation */
+ multipole_init(&ma, c->gparts, c->gcount);
+
+ /* Compare with recursive one */
+ struct multipole mb = c->multipole;
+
+ if (fabsf(ma.mass - mb.mass) / fabsf(ma.mass + mb.mass) > 1e-5)
+ error("Multipole masses are different (%12.15e vs. %12.15e)", ma.mass,
+ mb.mass);
+
+ for (int k = 0; k < 3; ++k)
+ if (fabsf(ma.CoM[k] - mb.CoM[k]) / fabsf(ma.CoM[k] + mb.CoM[k]) > 1e-5)
+ error("Multipole CoM are different (%12.15e vs. %12.15e", ma.CoM[k],
+ mb.CoM[k]);
+
+ if (fabsf(ma.I_xx - mb.I_xx) / fabsf(ma.I_xx + mb.I_xx) > 1e-5 &&
+ ma.I_xx > 1e-9)
+ error("Multipole I_xx are different (%12.15e vs. %12.15e)", ma.I_xx,
+ mb.I_xx);
+ if (fabsf(ma.I_yy - mb.I_yy) / fabsf(ma.I_yy + mb.I_yy) > 1e-5 &&
+ ma.I_yy > 1e-9)
+ error("Multipole I_yy are different (%12.15e vs. %12.15e)", ma.I_yy,
+ mb.I_yy);
+ if (fabsf(ma.I_zz - mb.I_zz) / fabsf(ma.I_zz + mb.I_zz) > 1e-5 &&
+ ma.I_zz > 1e-9)
+ error("Multipole I_zz are different (%12.15e vs. %12.15e)", ma.I_zz,
+ mb.I_zz);
+ if (fabsf(ma.I_xy - mb.I_xy) / fabsf(ma.I_xy + mb.I_xy) > 1e-5 &&
+ ma.I_xy > 1e-9)
+ error("Multipole I_xy are different (%12.15e vs. %12.15e)", ma.I_xy,
+ mb.I_xy);
+ if (fabsf(ma.I_xz - mb.I_xz) / fabsf(ma.I_xz + mb.I_xz) > 1e-5 &&
+ ma.I_xz > 1e-9)
+ error("Multipole I_xz are different (%12.15e vs. %12.15e)", ma.I_xz,
+ mb.I_xz);
+ if (fabsf(ma.I_yz - mb.I_yz) / fabsf(ma.I_yz + mb.I_yz) > 1e-5 &&
+ ma.I_yz > 1e-9)
+ error("Multipole I_yz are different (%12.15e vs. %12.15e)", ma.I_yz,
+ mb.I_yz);
+ }
+}
+
+/*
* @brief Frees up the memory allocated for this #cell
*/
void cell_clean(struct cell *c) {
diff --git a/src/cell.h b/src/cell.h
index a14d94d50ce6a4d2d25e7e6005c035cb22a3720d..150718eeb4bd3857e37d517718fe53661033a330 100644
--- a/src/cell.h
+++ b/src/cell.h
@@ -197,6 +197,9 @@ void cell_init_parts(struct cell *c, void *data);
void cell_init_gparts(struct cell *c, void *data);
void cell_convert_hydro(struct cell *c, void *data);
void cell_clean_links(struct cell *c, void *data);
+int cell_are_neighbours(const struct cell *restrict ci,
+ const struct cell *restrict cj);
+void cell_check_multipole(struct cell *c, void *data);
void cell_clean(struct cell *c);
#endif /* SWIFT_CELL_H */
diff --git a/src/common_io.c b/src/common_io.c
index d3590ae402487eb2f605a1cf3f5722f86704ad88..3c001d9da106a46ef5033c8cdec9346d68c54ecd 100644
--- a/src/common_io.c
+++ b/src/common_io.c
@@ -374,6 +374,9 @@ void writeCodeDescription(hid_t h_file) {
writeAttribute_s(h_grpcode, "Git Branch", git_branch());
writeAttribute_s(h_grpcode, "Git Revision", git_revision());
writeAttribute_s(h_grpcode, "HDF5 library version", hdf5_version());
+#ifdef HAVE_FFTW
+ writeAttribute_s(h_grpcode, "FFTW library version", fftw3_version());
+#endif
#ifdef WITH_MPI
writeAttribute_s(h_grpcode, "MPI library", mpi_version());
#ifdef HAVE_METIS
diff --git a/src/const.h b/src/const.h
index 50d72e3339d7b8f04e3028f09794aff45f03dc65..736ba0155e03dc0f595ac06edfd0763b38302e9c 100644
--- a/src/const.h
+++ b/src/const.h
@@ -36,11 +36,6 @@
/* Time integration constants. */
#define const_max_u_change 0.1f
-/* Gravity stuff. */
-#define const_theta_max 0.57735f
-#define const_G 6.672e-8f /* Gravitational constant. */
-#define const_epsilon 0.0014f /* Gravity blending distance. */
-
/* Hydrodynamical adiabatic index. */
#define HYDRO_GAMMA_5_3
//#define HYDRO_GAMMA_4_3
@@ -59,7 +54,13 @@
#define GADGET2_SPH
//#define DEFAULT_SPH
-/* External potential properties */
+/* Self gravity stuff. */
+#define const_gravity_multipole_order 2
+#define const_gravity_a_smooth 1.25f
+#define const_gravity_r_cut 4.5f
+#define const_gravity_eta 0.025f
+
+/* External gravity properties */
#define EXTERNAL_POTENTIAL_POINTMASS
//#define EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL
diff --git a/src/drift.h b/src/drift.h
index c71cda49b0ad2b18b227fc2d485bbe76f32d5b94..a6e347cc337385f89bece53bc477bff7d163c202 100644
--- a/src/drift.h
+++ b/src/drift.h
@@ -26,6 +26,7 @@
#include "const.h"
#include "debug.h"
#include "hydro.h"
+#include "part.h"
/**
* @brief Perform the 'drift' operation on a #gpart
diff --git a/src/engine.c b/src/engine.c
index b33850be9415f30f6c0ac38e70a74a7d239b791e..ebd589bbb0af01cec3196c658c4d014daca8df38 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -64,6 +64,7 @@
#include "serial_io.h"
#include "single_io.h"
#include "timers.h"
+#include "tools.h"
#include "units.h"
#include "version.h"
@@ -129,12 +130,10 @@ void engine_make_gravity_hierarchical_tasks(struct engine *e, struct cell *c,
engine_policy_external_gravity;
const int is_fixdt = (e->policy & engine_policy_fixdt) == engine_policy_fixdt;
- /* Am I the super-cell? */
- /* TODO(pedro): Add a condition for gravity tasks as well. */
- if (super == NULL &&
- (c->density != NULL || (!c->split && (c->count > 0 || c->gcount > 0)))) {
+ /* Is this the super-cell? */
+ if (super == NULL && (c->grav != NULL || (!c->split && c->gcount > 0))) {
- /* This is the super cell, i.e. the first with density tasks attached. */
+ /* This is the super cell, i.e. the first with gravity tasks attached. */
super = c;
/* Local tasks only... */
@@ -1182,9 +1181,62 @@ void engine_exchange_strays(struct engine *e, size_t offset_parts,
#endif
}
+/**
+ * @brief Constructs the top-level tasks for the short-range gravity
+ * interactions.
+ *
+ * All top-cells get a self task.
+ * All neighbouring pairs get a pair task.
+ * All non-neighbouring pairs within a range of 6 cells get a M-M task.
+ *
+ * @param e The #engine.
+ */
+void engine_make_gravity_tasks(struct engine *e) {
+
+ struct space *s = e->s;
+ struct scheduler *sched = &e->sched;
+ const int nodeID = e->nodeID;
+ struct cell *cells = s->cells;
+ const int nr_cells = s->nr_cells;
+
+ for (int cid = 0; cid < nr_cells; ++cid) {
+
+ struct cell *ci = &cells[cid];
+
+ /* Skip cells without gravity particles */
+ if (ci->gcount == 0) continue;
+
+ /* Is that neighbour local ? */
+ if (ci->nodeID != nodeID) continue;
+
+ /* If the cells is local build a self-interaction */
+ scheduler_addtask(sched, task_type_self, task_subtype_grav, 0, 0, ci, NULL,
+ 0);
+
+ /* Let's also build a task for all the non-neighbouring pm calculations */
+ scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, 0, 0, ci,
+ NULL, 0);
+
+ for (int cjd = cid + 1; cjd < nr_cells; ++cjd) {
+
+ struct cell *cj = &cells[cjd];
+
+ /* Skip cells without gravity particles */
+ if (cj->gcount == 0) continue;
+
+ /* Is that neighbour local ? */
+ if (cj->nodeID != nodeID) continue;
+
+ if (cell_are_neighbours(ci, cj))
+ scheduler_addtask(sched, task_type_pair, task_subtype_grav, 0, 0, ci,
+ cj, 1);
+ }
+ }
+}
+
/**
* @brief Constructs the top-level pair tasks for the first hydro loop over
- *neighbours
+ * neighbours
*
* Here we construct all the tasks for all possible neighbouring non-empty
* local cells in the hierarchy. No dependencies are being added thus far.
@@ -1316,6 +1368,115 @@ void engine_count_and_link_tasks(struct engine *e) {
}
}
+/**
+ * @brief Creates the dependency network for the gravity tasks of a given cell.
+ *
+ * @param sched The #scheduler.
+ * @param gravity The gravity task to link.
+ * @param c The cell.
+ */
+static inline void engine_make_gravity_dependencies(struct scheduler *sched,
+ struct task *gravity,
+ struct cell *c) {
+
+ /* init --> gravity --> kick */
+ scheduler_addunlock(sched, c->super->init, gravity);
+ scheduler_addunlock(sched, gravity, c->super->kick);
+
+ /* grav_up --> gravity ( --> kick) */
+ scheduler_addunlock(sched, c->super->grav_up, gravity);
+}
+
+/**
+ * @brief Creates all the task dependencies for the gravity
+ *
+ * @param e The #engine
+ */
+void engine_link_gravity_tasks(struct engine *e) {
+
+ struct scheduler *sched = &e->sched;
+ const int nodeID = e->nodeID;
+ const int nr_tasks = sched->nr_tasks;
+
+ /* Add one task gathering all the multipoles */
+ struct task *gather = scheduler_addtask(
+ sched, task_type_grav_gather_m, task_subtype_none, 0, 0, NULL, NULL, 0);
+
+ /* And one task performing the FFT */
+ struct task *fft = scheduler_addtask(sched, task_type_grav_fft,
+ task_subtype_none, 0, 0, NULL, NULL, 0);
+
+ scheduler_addunlock(sched, gather, fft);
+
+ for (int k = 0; k < nr_tasks; k++) {
+
+ /* Get a pointer to the task. */
+ struct task *t = &sched->tasks[k];
+
+ /* Skip? */
+ if (t->skip) continue;
+
+ /* Multipole construction */
+ if (t->type == task_type_grav_up) {
+ scheduler_addunlock(sched, t, gather);
+ }
+
+ /* Long-range interaction */
+ if (t->type == task_type_grav_mm) {
+
+ /* Gather the multipoles --> mm interaction --> kick */
+ scheduler_addunlock(sched, gather, t);
+ scheduler_addunlock(sched, t, t->ci->super->kick);
+
+ /* init --> mm interaction */
+ scheduler_addunlock(sched, t->ci->super->init, t);
+ }
+
+ /* Self-interaction? */
+ if (t->type == task_type_self && t->subtype == task_subtype_grav) {
+
+ engine_make_gravity_dependencies(sched, t, t->ci);
+
+ }
+
+ /* Otherwise, pair interaction? */
+ else if (t->type == task_type_pair && t->subtype == task_subtype_grav) {
+
+ if (t->ci->nodeID == nodeID) {
+
+ engine_make_gravity_dependencies(sched, t, t->ci);
+ }
+
+ if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+
+ engine_make_gravity_dependencies(sched, t, t->cj);
+ }
+
+ }
+
+ /* Otherwise, sub-self interaction? */
+ else if (t->type == task_type_sub_self && t->subtype == task_subtype_grav) {
+
+ if (t->ci->nodeID == nodeID) {
+ engine_make_gravity_dependencies(sched, t, t->ci);
+ }
+ }
+
+ /* Otherwise, sub-pair interaction? */
+ else if (t->type == task_type_sub_pair && t->subtype == task_subtype_grav) {
+
+ if (t->ci->nodeID == nodeID) {
+
+ engine_make_gravity_dependencies(sched, t, t->ci);
+ }
+ if (t->cj->nodeID == nodeID && t->ci->super != t->cj->super) {
+
+ engine_make_gravity_dependencies(sched, t, t->cj);
+ }
+ }
+ }
+}
+
/**
* @brief Creates the dependency network for the hydro tasks of a given cell.
*
@@ -1453,41 +1614,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) {
}
}
-/**
- * @brief Constructs the top-level pair tasks for the gravity M-M interactions
- *
- * Correct implementation is still lacking here.
- *
- * @param e The #engine.
- */
-void engine_make_gravityinteraction_tasks(struct engine *e) {
-
- struct space *s = e->s;
- struct scheduler *sched = &e->sched;
- const int nr_cells = s->nr_cells;
- struct cell *cells = s->cells;
-
- /* Loop over all cells. */
- for (int i = 0; i < nr_cells; i++) {
-
- /* If it has gravity particles, add a self-task */
- if (cells[i].gcount > 0) {
- scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0,
- &cells[i], NULL, 0);
-
- /* Loop over all remainding cells */
- for (int j = i + 1; j < nr_cells; j++) {
-
- /* If that other cell has gravity parts, add a pair interaction */
- if (cells[j].gcount > 0) {
- scheduler_addtask(sched, task_type_grav_mm, task_subtype_none, -1, 0,
- &cells[i], &cells[j], 0);
- }
- }
- }
- }
-}
-
/**
* @brief Constructs the gravity tasks building the multipoles and propagating
*them to the children
@@ -1513,9 +1639,12 @@ void engine_make_gravityrecursive_tasks(struct engine *e) {
struct task *up =
scheduler_addtask(sched, task_type_grav_up, task_subtype_none, 0, 0,
&cells[k], NULL, 0);
- struct task *down =
- scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0, 0,
- &cells[k], NULL, 0);
+
+ struct task *down = NULL;
+ /* struct task *down = */
+ /* scheduler_addtask(sched, task_type_grav_down, task_subtype_none, 0,
+ * 0, */
+ /* &cells[k], NULL, 0); */
/* Push tasks down the cell hierarchy. */
engine_addtasks_grav(e, &cells[k], up, down);
@@ -1548,11 +1677,10 @@ void engine_maketasks(struct engine *e) {
}
/* Construct the firt hydro loop over neighbours */
- engine_make_hydroloop_tasks(e);
+ if (e->policy & engine_policy_hydro) engine_make_hydroloop_tasks(e);
/* Add the gravity mm tasks. */
- if (e->policy & engine_policy_self_gravity)
- engine_make_gravityinteraction_tasks(e);
+ if (e->policy & engine_policy_self_gravity) engine_make_gravity_tasks(e);
/* Split the tasks. */
scheduler_splittasks(sched);
@@ -1573,7 +1701,7 @@ void engine_maketasks(struct engine *e) {
/* Count the number of tasks associated with each cell and
store the density tasks in each cell, and make each sort
depend on the sorts of its progeny. */
- engine_count_and_link_tasks(e);
+ if (e->policy & engine_policy_hydro) engine_count_and_link_tasks(e);
/* Append hierarchical tasks to each cells */
if (e->policy & engine_policy_hydro)
@@ -1588,7 +1716,12 @@ void engine_maketasks(struct engine *e) {
/* Run through the tasks and make force tasks for each density task.
Each force task depends on the cell ghosts and unlocks the kick task
of its super-cell. */
- engine_make_extra_hydroloop_tasks(e);
+ if (e->policy & engine_policy_hydro) engine_make_extra_hydroloop_tasks(e);
+
+ /* Add the dependencies for the self-gravity stuff */
+ if (e->policy & engine_policy_self_gravity) engine_link_gravity_tasks(e);
+
+#ifdef WITH_MPI
/* Add the communication tasks if MPI is being used. */
if (e->policy & engine_policy_mpi) {
@@ -1611,6 +1744,7 @@ void engine_maketasks(struct engine *e) {
NULL);
}
}
+#endif
/* Set the unlocks per task. */
scheduler_set_unlocks(sched);
@@ -1740,7 +1874,7 @@ int engine_marktasks(struct engine *e) {
continue;
/* Set the sort flags. */
- if (t->type == task_type_pair) {
+ if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
if (!(ci->sorted & (1 << t->flags))) {
ci->sorts->flags |= (1 << t->flags);
ci->sorts->skip = 0;
@@ -2144,6 +2278,7 @@ void engine_collect_drift(struct cell *c) {
c->ang_mom[1] = ang_mom[1];
c->ang_mom[2] = ang_mom[2];
}
+
/**
* @brief Print the conserved quantities statistics to a log file
*
@@ -2315,7 +2450,16 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
/* Add the tasks corresponding to self-gravity to the masks */
if (e->policy & engine_policy_self_gravity) {
- /* Nothing here for now */
+ mask |= 1 << task_type_grav_up;
+ mask |= 1 << task_type_grav_mm;
+ mask |= 1 << task_type_grav_gather_m;
+ mask |= 1 << task_type_grav_fft;
+ mask |= 1 << task_type_self;
+ mask |= 1 << task_type_pair;
+ mask |= 1 << task_type_sub_self;
+ mask |= 1 << task_type_sub_pair;
+
+ submask |= 1 << task_subtype_grav;
}
/* Add the tasks corresponding to external gravity to the masks */
@@ -2326,6 +2470,7 @@ void engine_init_particles(struct engine *e, int flag_entropy_ICs) {
/* Add MPI tasks if need be */
if (e->policy & engine_policy_mpi) {
+
mask |= 1 << task_type_send;
mask |= 1 << task_type_recv;
submask |= 1 << task_subtype_tend;
@@ -2450,7 +2595,16 @@ void engine_step(struct engine *e) {
/* Add the tasks corresponding to self-gravity to the masks */
if (e->policy & engine_policy_self_gravity) {
- /* Nothing here for now */
+ mask |= 1 << task_type_grav_up;
+ mask |= 1 << task_type_grav_mm;
+ mask |= 1 << task_type_grav_gather_m;
+ mask |= 1 << task_type_grav_fft;
+ mask |= 1 << task_type_self;
+ mask |= 1 << task_type_pair;
+ mask |= 1 << task_type_sub_self;
+ mask |= 1 << task_type_sub_pair;
+
+ submask |= 1 << task_subtype_grav;
}
/* Add the tasks corresponding to external gravity to the masks */
@@ -2460,6 +2614,7 @@ void engine_step(struct engine *e) {
/* Add MPI tasks if need be */
if (e->policy & engine_policy_mpi) {
+
mask |= 1 << task_type_send;
mask |= 1 << task_type_recv;
submask |= 1 << task_subtype_tend;
diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h
index 32af9b6c67fb9d78d70ae6f41e8b4a32bfb1d8c8..5903ce1384f616a123c74b579539fe53747d17e4 100644
--- a/src/gravity/Default/gravity.h
+++ b/src/gravity/Default/gravity.h
@@ -53,8 +53,6 @@ gravity_compute_timestep_external(const struct external_potential* potential,
/**
* @brief Computes the gravity time-step of a given particle due to self-gravity
*
- * This function only branches towards the potential chosen by the user.
- *
* @param phys_const The physical constants in internal units.
* @param gp Pointer to the g-particle data.
*/
@@ -62,7 +60,13 @@ __attribute__((always_inline)) INLINE static float
gravity_compute_timestep_self(const struct phys_const* const phys_const,
const struct gpart* const gp) {
- float dt = FLT_MAX;
+ const float ac2 = gp->a_grav[0] * gp->a_grav[0] +
+ gp->a_grav[1] * gp->a_grav[1] +
+ gp->a_grav[2] * gp->a_grav[2];
+
+ const float ac = (ac2 > 0.f) ? sqrtf(ac2) : FLT_MIN;
+
+ const float dt = sqrt(2.f * const_gravity_eta * gp->epsilon / ac);
return dt;
}
@@ -76,7 +80,9 @@ gravity_compute_timestep_self(const struct phys_const* const phys_const,
* @param gp The particle to act upon
*/
__attribute__((always_inline)) INLINE static void gravity_first_init_gpart(
- struct gpart* gp) {}
+ struct gpart* gp) {
+ gp->epsilon = 0.; // MATTHIEU
+}
/**
* @brief Prepares a g-particle for the gravity calculation
@@ -101,9 +107,16 @@ __attribute__((always_inline)) INLINE static void gravity_init_gpart(
* Multiplies the forces and accelerations by the appropiate constants
*
* @param gp The particle to act upon
+ * @param const_G Newton's constant
*/
__attribute__((always_inline)) INLINE static void gravity_end_force(
- struct gpart* gp) {}
+ struct gpart* gp, double const_G) {
+
+ /* Let's get physical... */
+ gp->a_grav[0] *= const_G;
+ gp->a_grav[1] *= const_G;
+ gp->a_grav[2] *= const_G;
+}
/**
* @brief Computes the gravitational acceleration induced by external potentials
diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h
index dd6878841d7d0632ab8e4f218044516d9895cab8..aa285ce3245aeac608aa022924a613a7a7b00375 100644
--- a/src/gravity/Default/gravity_iact.h
+++ b/src/gravity/Default/gravity_iact.h
@@ -23,93 +23,168 @@
/* Includes. */
#include "const.h"
#include "kernel_gravity.h"
+#include "kernel_long_gravity.h"
+#include "multipole.h"
#include "vector.h"
/**
- * @brief Gravity potential
+ * @brief Gravity forces between particles
*/
-__attribute__((always_inline)) INLINE static void runner_iact_grav(
- float r2, float *dx, struct gpart *pi, struct gpart *pj) {
+__attribute__((always_inline)) INLINE static void runner_iact_grav_pp(
+ float rlr_inv, float r2, const float *dx, struct gpart *gpi,
+ struct gpart *gpj) {
- float ir, r;
- float w, acc;
- float mi = pi->mass, mj = pj->mass;
- int k;
+ /* Apply the gravitational acceleration. */
+ const float r = sqrtf(r2);
+ const float ir = 1.f / r;
+ const float mi = gpi->mass;
+ const float mj = gpj->mass;
+ const float hi = gpi->epsilon;
+ const float hj = gpj->epsilon;
+ const float u = r * rlr_inv;
+ float f_lr, fi, fj, W;
- /* Get the absolute distance. */
- ir = 1.0f / sqrtf(r2);
- r = r2 * ir;
+ /* Get long-range correction */
+ kernel_long_grav_eval(u, &f_lr);
- /* Evaluate the gravity kernel. */
- kernel_grav_eval(r, &acc);
+ if (r >= hi) {
- /* Scale the acceleration. */
- acc *= const_G * ir * ir * ir;
+ /* Get Newtonian gravity */
+ fi = mj * ir * ir * ir * f_lr;
- /* Aggregate the accelerations. */
- for (k = 0; k < 3; k++) {
- w = acc * dx[k];
- pi->a_grav[k] -= w * mj;
- pj->a_grav[k] += w * mi;
+ } else {
+
+ const float hi_inv = 1.f / hi;
+ const float hi_inv3 = hi_inv * hi_inv * hi_inv;
+ const float ui = r * hi_inv;
+
+ kernel_grav_eval(ui, &W);
+
+ /* Get softened gravity */
+ fi = mj * hi_inv3 * W * f_lr;
}
+
+ if (r >= hj) {
+
+ /* Get Newtonian gravity */
+ fj = mi * ir * ir * ir * f_lr;
+
+ } else {
+
+ const float hj_inv = 1.f / hj;
+ const float hj_inv3 = hj_inv * hj_inv * hj_inv;
+ const float uj = r * hj_inv;
+
+ kernel_grav_eval(uj, &W);
+
+ /* Get softened gravity */
+ fj = mi * hj_inv3 * W * f_lr;
+ }
+
+ const float fidx[3] = {fi * dx[0], fi * dx[1], fi * dx[2]};
+ gpi->a_grav[0] -= fidx[0];
+ gpi->a_grav[1] -= fidx[1];
+ gpi->a_grav[2] -= fidx[2];
+
+ const float fjdx[3] = {fj * dx[0], fj * dx[1], fj * dx[2]};
+ gpj->a_grav[0] += fjdx[0];
+ gpj->a_grav[1] += fjdx[1];
+ gpj->a_grav[2] += fjdx[2];
}
/**
- * @brief Gravity potential (Vectorized version)
+ * @brief Gravity forces between particles (non-symmetric version)
*/
-__attribute__((always_inline)) INLINE static void runner_iact_vec_grav(
- float *R2, float *Dx, struct gpart **pi, struct gpart **pj) {
-
-#ifdef WITH_VECTORIZATION
-
- vector ir, r, r2, dx[3];
- vector w, acc, ai, aj;
- vector mi, mj;
- int j, k;
-
-#if VEC_SIZE == 8
- mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass,
- pi[4]->mass, pi[5]->mass, pi[6]->mass, pi[7]->mass);
- mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass,
- pj[4]->mass, pj[5]->mass, pj[6]->mass, pj[7]->mass);
- for (k = 0; k < 3; k++)
- dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k], Dx[12 + k],
- Dx[15 + k], Dx[18 + k], Dx[21 + k]);
-#elif VEC_SIZE == 4
- mi.v = vec_set(pi[0]->mass, pi[1]->mass, pi[2]->mass, pi[3]->mass);
- mj.v = vec_set(pj[0]->mass, pj[1]->mass, pj[2]->mass, pj[3]->mass);
- for (k = 0; k < 3; k++)
- dx[k].v = vec_set(Dx[0 + k], Dx[3 + k], Dx[6 + k], Dx[9 + k]);
-#endif
+__attribute__((always_inline)) INLINE static void runner_iact_grav_pp_nonsym(
+ float rlr_inv, float r2, const float *dx, struct gpart *gpi,
+ const struct gpart *gpj) {
- /* Get the radius and inverse radius. */
- r2.v = vec_load(R2);
- ir.v = vec_rsqrt(r2.v);
- ir.v = ir.v - vec_set1(0.5f) * ir.v * (r2.v * ir.v * ir.v - vec_set1(1.0f));
- r.v = r2.v * ir.v;
-
- /* Evaluate the gravity kernel. */
- blender_eval_vec(&r, &acc);
-
- /* Scale the acceleration. */
- acc.v *= vec_set1(const_G) * ir.v * ir.v * ir.v;
-
- /* Aggregate the accelerations. */
- for (k = 0; k < 3; k++) {
- w.v = acc.v * dx[k].v;
- ai.v = w.v * mj.v;
- aj.v = w.v * mi.v;
- for (j = 0; j < VEC_SIZE; j++) {
- pi[j]->a_grav[k] -= ai.f[j];
- pj[j]->a_grav[k] += aj.f[j];
- }
+ /* Apply the gravitational acceleration. */
+ const float r = sqrtf(r2);
+ const float ir = 1.f / r;
+ const float mj = gpj->mass;
+ const float hi = gpi->epsilon;
+ const float u = r * rlr_inv;
+ float f_lr, f, W;
+
+ /* Get long-range correction */
+ kernel_long_grav_eval(u, &f_lr);
+
+ if (r >= hi) {
+
+ /* Get Newtonian gravity */
+ f = mj * ir * ir * ir * f_lr;
+
+ } else {
+
+ const float hi_inv = 1.f / hi;
+ const float hi_inv3 = hi_inv * hi_inv * hi_inv;
+ const float ui = r * hi_inv;
+
+ kernel_grav_eval(ui, &W);
+
+ /* Get softened gravity */
+ f = mj * hi_inv3 * W * f_lr;
}
-#else
+ const float fdx[3] = {f * dx[0], f * dx[1], f * dx[2]};
+
+ gpi->a_grav[0] -= fdx[0];
+ gpi->a_grav[1] -= fdx[1];
+ gpi->a_grav[2] -= fdx[2];
+}
- for (int k = 0; k < VEC_SIZE; k++)
- runner_iact_grav(R2[k], &Dx[3 * k], pi[k], pj[k]);
+/**
+ * @brief Gravity forces between particle and multipole
+ */
+__attribute__((always_inline)) INLINE static void runner_iact_grav_pm(
+ float rlr_inv, float r2, const float *dx, struct gpart *gp,
+ const struct multipole *multi) {
+
+ /* Apply the gravitational acceleration. */
+ const float r = sqrtf(r2);
+ const float ir = 1.f / r;
+ const float u = r * rlr_inv;
+ const float mrinv3 = multi->mass * ir * ir * ir;
+
+ /* Get long-range correction */
+ float f_lr;
+ kernel_long_grav_eval(u, &f_lr);
+
+#if const_gravity_multipole_order < 2
+
+ /* 0th and 1st order terms */
+ gp->a_grav[0] += mrinv3 * f_lr * dx[0];
+ gp->a_grav[1] += mrinv3 * f_lr * dx[1];
+ gp->a_grav[2] += mrinv3 * f_lr * dx[2];
+
+#elif const_gravity_multipole_order == 2
+ /* Terms up to 2nd order (quadrupole) */
+
+ /* Follows the notation in Bonsai */
+ const float mrinv5 = mrinv3 * ir * ir;
+ const float mrinv7 = mrinv5 * ir * ir;
+
+ const float D1 = -mrinv3;
+ const float D2 = 3.f * mrinv5;
+ const float D3 = -15.f * mrinv7;
+
+ const float q = multi->I_xx + multi->I_yy + multi->I_zz;
+ const float qRx =
+ multi->I_xx * dx[0] + multi->I_xy * dx[1] + multi->I_xz * dx[2];
+ const float qRy =
+ multi->I_xy * dx[0] + multi->I_yy * dx[1] + multi->I_yz * dx[2];
+ const float qRz =
+ multi->I_xz * dx[0] + multi->I_yz * dx[1] + multi->I_zz * dx[2];
+ const float qRR = qRx * dx[0] + qRy * dx[1] + qRz * dx[2];
+ const float C = D1 + 0.5f * D2 * q + 0.5f * D3 * qRR;
+
+ gp->a_grav[0] -= f_lr * (C * dx[0] + D2 * qRx);
+ gp->a_grav[1] -= f_lr * (C * dx[1] + D2 * qRy);
+ gp->a_grav[2] -= f_lr * (C * dx[2] + D2 * qRz);
+#else
+#error "Multipoles of order >2 not yet implemented."
#endif
}
diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h
index b6c9e62559207cb25323c8a108e4bffc87ea0fcf..77e6543429993bea355c04dd28fcb5b04eee5519 100644
--- a/src/gravity/Default/gravity_part.h
+++ b/src/gravity/Default/gravity_part.h
@@ -37,16 +37,15 @@ struct gpart {
/* Particle mass. */
float mass;
+ /* Softening length */
+ float epsilon;
+
/* Particle time of beginning of time-step. */
int ti_begin;
/* Particle time of end of time-step. */
int ti_end;
- /* /\* current time of x, and of v_full *\/ */
- /* float tx; */
- /* float tv; */
-
/* Particle ID. If negative, it is the negative offset of the #part with
which this gpart is linked. */
long long id_or_neg_offset;
diff --git a/src/kernel_gravity.c b/src/kernel_gravity.c
deleted file mode 100644
index 639a964c813ef7fd95008857ee17b7dd5ffafb27..0000000000000000000000000000000000000000
--- a/src/kernel_gravity.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*******************************************************************************
- * This file is part of SWIFT.
- * Copyright (c) 2015 Pedro Gonnet (pedro.gonnet@durham.ac.uk),
- * Matthieu Schaller (matthieu.schaller@durham.ac.uk)
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program. If not, see .
- *
- ******************************************************************************/
-
-#include
-#include
-
-#include "kernel_gravity.h"
-
-/**
- * @brief The Gadget-2 gravity kernel function
- *
- * @param r The distance between particles
- */
-float gadget(float r) {
- float fac, h_inv, u, r2 = r * r;
- if (r >= const_epsilon)
- fac = 1.0f / (r2 * r);
- else {
- h_inv = 1. / const_epsilon;
- u = r * h_inv;
- if (u < 0.5)
- fac = const_iepsilon3 * (10.666666666667 + u * u * (32.0 * u - 38.4));
- else
- fac = const_iepsilon3 *
- (21.333333333333 - 48.0 * u + 38.4 * u * u -
- 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
- }
- return const_G * fac;
-}
-
-/**
- * @brief Test the gravity kernel function by dumping it in the interval [0,1].
- *
- * @param r_max The radius up to which the kernel is dumped.
- * @param N number of intervals in [0,1].
- */
-void gravity_kernel_dump(float r_max, int N) {
-
- int k;
- float x, w;
- float x4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- float w4[4] = {0.0f, 0.0f, 0.0f, 0.0f};
- // float dw_dx4[4] __attribute__ ((aligned (16)));
-
- for (k = 1; k <= N; k++) {
- x = (r_max * k) / N;
- x4[3] = x4[2];
- x4[2] = x4[1];
- x4[1] = x4[0];
- x4[0] = x;
- kernel_grav_eval(x, &w);
- w *= const_G / (x * x * x);
- // blender_deval_vec( (vector *)x4 , (vector *)w4 , (vector *)dw_dx4 );
- printf(" %.16e %.16e %.16e %.16e %.16e %.16e %.16e\n", x, w * x, w4[0],
- w4[1], w4[2], w4[3], gadget(x) * x);
- }
-}
diff --git a/src/kernel_gravity.h b/src/kernel_gravity.h
index 0c786f5189b882ed4593ef4a927edbf2a8cdd6e7..b38feb5758debf87add2007ee3684d869f393f7e 100644
--- a/src/kernel_gravity.h
+++ b/src/kernel_gravity.h
@@ -20,203 +20,70 @@
#ifndef SWIFT_KERNEL_GRAVITY_H
#define SWIFT_KERNEL_GRAVITY_H
+#include
+
/* Includes. */
#include "const.h"
#include "inline.h"
#include "vector.h"
-#define const_iepsilon (1. / const_epsilon)
-#define const_iepsilon2 (const_iepsilon * const_iepsilon)
-#define const_iepsilon3 (const_iepsilon2 * const_iepsilon)
-#define const_iepsilon4 (const_iepsilon2 * const_iepsilon2)
-#define const_iepsilon5 (const_iepsilon3 * const_iepsilon2)
-#define const_iepsilon6 (const_iepsilon3 * const_iepsilon3)
-
/* The gravity kernel is defined as a degree 6 polynomial in the distance
r. The resulting value should be post-multiplied with r^-3, resulting
in a polynomial with terms ranging from r^-3 to r^3, which are
sufficient to model both the direct potential as well as the splines
- near the origin. */
-
-/* Coefficients for the gravity kernel. */
-#define kernel_grav_degree 6
-#define kernel_grav_ivals 2
-#define kernel_grav_scale (2 * const_iepsilon)
-static float
- kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)] = {
- 32.0f * const_iepsilon6,
- -192.0f / 5.0f * const_iepsilon5,
- 0.0f,
- 32.0f / 3.0f * const_iepsilon3,
- 0.0f,
- 0.0f,
- 0.0f,
- -32.0f / 3.0f * const_iepsilon6,
- 192.0f / 5.0f * const_iepsilon5,
- -48.0f * const_iepsilon4,
- 64.0f / 3.0f * const_iepsilon3,
- 0.0f,
- 0.0f,
- -1.0f / 15.0f,
- 0.0f,
- 0.0f,
- 0.0f,
- 0.0f,
- 0.0f,
- 0.0f,
- 1.0f};
+ near the origin.
+ As in the hydro case, the 1/h^3 needs to be multiplied in afterwards */
+
+/* Coefficients for the kernel. */
+#define kernel_grav_name "Gadget-2 softening kernel"
+#define kernel_grav_degree 6 /* Degree of the polynomial */
+#define kernel_grav_ivals 2 /* Number of branches */
+static const float
+ kernel_grav_coeffs[(kernel_grav_degree + 1) * (kernel_grav_ivals + 1)]
+ __attribute__((aligned(16))) = {32.f,
+ -38.4f,
+ 0.f,
+ 10.66666667f,
+ 0.f,
+ 0.f,
+ 0.f, /* 0 < u < 0.5 */
+ -10.66666667f,
+ 38.4f,
+ -48.f,
+ 21.3333333,
+ 0.f,
+ 0.f,
+ -0.066666667f, /* 0.5 < u < 1 */
+ 0.f,
+ 0.f,
+ 0.f,
+ 0.f,
+ 0.f,
+ 0.f,
+ 0.f}; /* 1 < u */
/**
- * @brief Computes the gravity cubic spline for a given distance x.
- */
-
-__attribute__((always_inline)) INLINE static void kernel_grav_eval(float x,
- float *W) {
- int ind = fmin(x * kernel_grav_scale, kernel_grav_ivals);
- float *coeffs = &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)];
- float w = coeffs[0] * x + coeffs[1];
- for (int k = 2; k <= kernel_grav_degree; k++) w = x * w + coeffs[k];
- *W = w;
-}
-
-#ifdef WITH_VECTORIZATION
-
-/**
- * @brief Computes the gravity cubic spline for a given distance x (Vectorized
- * version).
- */
-
-__attribute__((always_inline)) INLINE static void kernel_grav_eval_vec(
- vector *x, vector *w) {
-
- vector ind, c[kernel_grav_degree + 1];
- int j, k;
-
- /* Load x and get the interval id. */
- ind.m = vec_ftoi(vec_fmin(x->v * vec_set1(kernel_grav_scale),
- vec_set1((float)kernel_grav_ivals)));
-
- /* load the coefficients. */
- for (k = 0; k < VEC_SIZE; k++)
- for (j = 0; j < kernel_grav_degree + 1; j++)
- c[j].f[k] = kernel_grav_coeffs[ind.i[k] * (kernel_grav_degree + 1) + j];
-
- /* Init the iteration for Horner's scheme. */
- w->v = (c[0].v * x->v) + c[1].v;
-
- /* And we're off! */
- for (int k = 2; k <= kernel_grav_degree; k++) w->v = (x->v * w->v) + c[k].v;
-}
-
-#endif
-
-/* Blending function stuff
- * --------------------------------------------------------------------------------------------
- */
-
-/* Coefficients for the blending function. */
-#define blender_degree 3
-#define blender_ivals 3
-#define blender_scale 4.0f
-static float blender_coeffs[(blender_degree + 1) * (blender_ivals + 1)] = {
- 0.0f, 0.0f, 0.0f, 1.0f, -32.0f, 24.0f, -6.0f, 1.5f,
- -32.0f, 72.0f, -54.0f, 13.5f, 0.0f, 0.0f, 0.0f, 0.0f};
-
-/**
- * @brief Computes the cubic spline blender for a given distance x.
- */
-
-__attribute__((always_inline)) INLINE static void blender_eval(float x,
- float *W) {
- int ind = fmin(x * blender_scale, blender_ivals);
- float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
- float w = coeffs[0] * x + coeffs[1];
- for (int k = 2; k <= blender_degree; k++) w = x * w + coeffs[k];
- *W = w;
-}
-
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x.
- */
-
-__attribute__((always_inline)) INLINE static void blender_deval(float x,
- float *W,
- float *dW_dx) {
- int ind = fminf(x * blender_scale, blender_ivals);
- float *coeffs = &blender_coeffs[ind * (blender_degree + 1)];
- float w = coeffs[0] * x + coeffs[1];
- float dw_dx = coeffs[0];
- for (int k = 2; k <= blender_degree; k++) {
- dw_dx = dw_dx * x + w;
- w = x * w + coeffs[k];
- }
- *W = w;
- *dW_dx = dw_dx;
-}
-
-#ifdef WITH_VECTORIZATION
-
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
- */
-
-__attribute__((always_inline)) INLINE static void blender_eval_vec(vector *x,
- vector *w) {
-
- vector ind, c[blender_degree + 1];
- int j, k;
-
- /* Load x and get the interval id. */
- ind.m = vec_ftoi(
- vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
-
- /* load the coefficients. */
- for (k = 0; k < VEC_SIZE; k++)
- for (j = 0; j < blender_degree + 1; j++)
- c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
-
- /* Init the iteration for Horner's scheme. */
- w->v = (c[0].v * x->v) + c[1].v;
-
- /* And we're off! */
- for (int k = 2; k <= blender_degree; k++) w->v = (x->v * w->v) + c[k].v;
-}
-
-/**
- * @brief Computes the cubic spline blender and its derivative for a given
- * distance x (Vectorized version). Gives a sensible answer only if x<2.
+ * @brief Computes the gravity softening function.
+ *
+ * @param u The ratio of the distance to the softening length $u = x/h$.
+ * @param W (return) The value of the kernel function $W(x,h)$.
*/
+__attribute__((always_inline)) INLINE static void kernel_grav_eval(
+ float u, float *const W) {
-__attribute__((always_inline)) INLINE static void blender_deval_vec(
- vector *x, vector *w, vector *dw_dx) {
-
- vector ind, c[blender_degree + 1];
- int j, k;
+ /* Pick the correct branch of the kernel */
+ const int ind = (int)fminf(u * (float)kernel_grav_ivals, kernel_grav_ivals);
+ const float *const coeffs =
+ &kernel_grav_coeffs[ind * (kernel_grav_degree + 1)];
- /* Load x and get the interval id. */
- ind.m = vec_ftoi(
- vec_fmin(x->v * vec_set1(blender_scale), vec_set1((float)blender_ivals)));
+ /* First two terms of the polynomial ... */
+ float w = coeffs[0] * u + coeffs[1];
- /* load the coefficients. */
- for (k = 0; k < VEC_SIZE; k++)
- for (j = 0; j < blender_degree + 1; j++)
- c[j].f[k] = blender_coeffs[ind.i[k] * (blender_degree + 1) + j];
+ /* ... and the rest of them */
+ for (int k = 2; k <= kernel_grav_degree; k++) w = u * w + coeffs[k];
- /* Init the iteration for Horner's scheme. */
- w->v = (c[0].v * x->v) + c[1].v;
- dw_dx->v = c[0].v;
-
- /* And we're off! */
- for (int k = 2; k <= blender_degree; k++) {
- dw_dx->v = (dw_dx->v * x->v) + w->v;
- w->v = (x->v * w->v) + c[k].v;
- }
+ /* Return everything */
+ *W = w / (u * u * u);
}
-#endif
-
-void gravity_kernel_dump(float r_max, int N);
-
#endif // SWIFT_KERNEL_GRAVITY_H
diff --git a/src/kernel_long_gravity.h b/src/kernel_long_gravity.h
new file mode 100644
index 0000000000000000000000000000000000000000..d247c7a461d4bd116f30ab106143f6c75e1b941e
--- /dev/null
+++ b/src/kernel_long_gravity.h
@@ -0,0 +1,50 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+#ifndef SWIFT_KERNEL_LONG_GRAVITY_H
+#define SWIFT_KERNEL_LONG_GRAVITY_H
+
+#include
+
+/* Includes. */
+#include "const.h"
+#include "inline.h"
+#include "vector.h"
+
+#define one_over_sqrt_pi ((float)(M_2_SQRTPI * 0.5))
+
+/**
+ * @brief Computes the long-range correction term for the FFT calculation.
+ *
+ * @param u The ratio of the distance to the FFT cell scale $u = x/A$.
+ * @param W (return) The value of the kernel function.
+ */
+__attribute__((always_inline)) INLINE static void kernel_long_grav_eval(
+ float u, float *const W) {
+
+ const float arg1 = u * 0.5f;
+ const float arg2 = u * one_over_sqrt_pi;
+ const float arg3 = -arg1 * arg1;
+
+ const float term1 = erfc(arg1);
+ const float term2 = arg2 * expf(arg3);
+
+ *W = term1 + term2;
+}
+
+#endif // SWIFT_KERNEL_LONG_GRAVITY_H
diff --git a/src/multipole.c b/src/multipole.c
index fc4701fead68e64e8742730325931282f80e8934..f0d0c7084fd9bec366f2185cb24887da40591b17 100644
--- a/src/multipole.c
+++ b/src/multipole.c
@@ -1,6 +1,7 @@
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
@@ -21,149 +22,203 @@
#include "../config.h"
/* Some standard headers. */
-#include
-#include
-#include
-#include
-#include
-#include
-
-/* MPI headers. */
-#ifdef WITH_MPI
-#include
-#endif
+#include
/* This object's header. */
#include "multipole.h"
/**
- * @brief Merge two multipoles.
- *
- * @param ma The #multipole which will contain the merged result.
- * @param mb The other #multipole.
- */
+* @brief Reset the data of a #multipole.
+*
+* @param m The #multipole.
+*/
+void multipole_reset(struct multipole *m) {
-void multipole_merge(struct multipole *ma, struct multipole *mb) {
+ /* Just bzero the struct. */
+ bzero(m, sizeof(struct multipole));
+}
-#if multipole_order == 1
+/**
+* @brief Init a multipole from a set of particles.
+*
+* @param m The #multipole.
+* @param gparts The #gpart.
+* @param gcount The number of particles.
+*/
+void multipole_init(struct multipole *m, const struct gpart *gparts,
+ int gcount) {
+
+#if const_gravity_multipole_order > 2
+#error "Multipoles of order >2 not yet implemented."
+#endif
- /* Correct the position. */
- float mma = ma->coeffs[0], mmb = mb->coeffs[0];
- float w = 1.0f / (mma + mmb);
- for (int k = 0; k < 3; k++) ma->x[k] = (ma->x[k] * mma + mb->x[k] * mmb) * w;
+ /* Zero everything */
+ multipole_reset(m);
- /* Add the particle to the moments. */
- ma->coeffs[0] = mma + mmb;
+ /* Temporary variables */
+ double mass = 0.0;
+ double com[3] = {0.0, 0.0, 0.0};
-#else
-#error( "Multipoles of order %i not yet implemented." , multipole_order )
+#if const_gravity_multipole_order >= 2
+ double I_xx = 0.0, I_yy = 0.0, I_zz = 0.0;
+ double I_xy = 0.0, I_xz = 0.0, I_yz = 0.0;
#endif
-}
-
-/**
- * @brief Add a particle to the given multipole.
- *
- * @param m The #multipole.
- * @param p The #gpart.
- */
-
-void multipole_addpart(struct multipole *m, struct gpart *p) {
-#if multipole_order == 1
+ /* Collect the particle data. */
+ for (int k = 0; k < gcount; k++) {
+ const float w = gparts[k].mass;
- /* Correct the position. */
- float mm = m->coeffs[0], mp = p->mass;
- float w = 1.0f / (mm + mp);
- for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + p->x[k] * mp) * w;
+ mass += w;
+ com[0] += gparts[k].x[0] * w;
+ com[1] += gparts[k].x[1] * w;
+ com[2] += gparts[k].x[2] * w;
+
+#if const_gravity_multipole_order >= 2
+ I_xx += gparts[k].x[0] * gparts[k].x[0] * w;
+ I_yy += gparts[k].x[1] * gparts[k].x[1] * w;
+ I_zz += gparts[k].x[2] * gparts[k].x[2] * w;
+ I_xy += gparts[k].x[0] * gparts[k].x[1] * w;
+ I_xz += gparts[k].x[0] * gparts[k].x[2] * w;
+ I_yz += gparts[k].x[1] * gparts[k].x[2] * w;
+#endif
+ }
- /* Add the particle to the moments. */
- m->coeffs[0] = mm + mp;
+ const double imass = 1.0 / mass;
-#else
-#error( "Multipoles of order %i not yet implemented." , multipole_order )
+ /* Store the data on the multipole. */
+ m->mass = mass;
+ m->CoM[0] = com[0] * imass;
+ m->CoM[1] = com[1] * imass;
+ m->CoM[2] = com[2] * imass;
+
+#if const_gravity_multipole_order >= 2
+ m->I_xx = I_xx - imass * com[0] * com[0];
+ m->I_yy = I_yy - imass * com[1] * com[1];
+ m->I_zz = I_zz - imass * com[2] * com[2];
+ m->I_xy = I_xy - imass * com[0] * com[1];
+ m->I_xz = I_xz - imass * com[0] * com[2];
+ m->I_yz = I_yz - imass * com[1] * com[2];
#endif
}
/**
- * @brief Add a group of particles to the given multipole.
+ * @brief Add the second multipole to the first one.
*
- * @param m The #multipole.
- * @param p The #gpart array.
- * @param N Number of parts to add.
+ * @param ma The #multipole which will contain the sum.
+ * @param mb The #multipole to add.
*/
-void multipole_addparts(struct multipole *m, struct gpart *p, int N) {
+void multipole_add(struct multipole *ma, const struct multipole *mb) {
-#if multipole_order == 1
-
- /* Get the combined mass and positions. */
- double xp[3] = {0.0, 0.0, 0.0};
- float mp = 0.0f, w;
- for (int k = 0; k < N; k++) {
- w = p[k].mass;
- mp += w;
- xp[0] += p[k].x[0] * w;
- xp[1] += p[k].x[1] * w;
- xp[2] += p[k].x[2] * w;
- }
+#if const_gravity_multipole_order > 2
+#error "Multipoles of order >2 not yet implemented."
+#endif
/* Correct the position. */
- float mm = m->coeffs[0];
- w = 1.0f / (mm + mp);
- for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + xp[k]) * w;
-
- /* Add the particle to the moments. */
- m->coeffs[0] = mm + mp;
+ const double ma_mass = ma->mass;
+ const double mb_mass = mb->mass;
+ const double M_tot = ma_mass + mb_mass;
+ const double M_tot_inv = 1.0 / M_tot;
+
+ const double ma_CoM[3] = {ma->CoM[0], ma->CoM[1], ma->CoM[2]};
+ const double mb_CoM[3] = {mb->CoM[0], mb->CoM[1], mb->CoM[2]};
+
+#if const_gravity_multipole_order >= 2
+ const double ma_I_xx = (double)ma->I_xx + ma_mass * ma_CoM[0] * ma_CoM[0];
+ const double ma_I_yy = (double)ma->I_yy + ma_mass * ma_CoM[1] * ma_CoM[1];
+ const double ma_I_zz = (double)ma->I_zz + ma_mass * ma_CoM[2] * ma_CoM[2];
+ const double ma_I_xy = (double)ma->I_xy + ma_mass * ma_CoM[0] * ma_CoM[1];
+ const double ma_I_xz = (double)ma->I_xz + ma_mass * ma_CoM[0] * ma_CoM[2];
+ const double ma_I_yz = (double)ma->I_yz + ma_mass * ma_CoM[1] * ma_CoM[2];
+
+ const double mb_I_xx = (double)mb->I_xx + mb_mass * mb_CoM[0] * mb_CoM[0];
+ const double mb_I_yy = (double)mb->I_yy + mb_mass * mb_CoM[1] * mb_CoM[1];
+ const double mb_I_zz = (double)mb->I_zz + mb_mass * mb_CoM[2] * mb_CoM[2];
+ const double mb_I_xy = (double)mb->I_xy + mb_mass * mb_CoM[0] * mb_CoM[1];
+ const double mb_I_xz = (double)mb->I_xz + mb_mass * mb_CoM[0] * mb_CoM[2];
+ const double mb_I_yz = (double)mb->I_yz + mb_mass * mb_CoM[1] * mb_CoM[2];
+#endif
-#else
-#error( "Multipoles of order %i not yet implemented." , multipole_order )
+ /* New mass */
+ ma->mass = M_tot;
+
+ /* New CoM */
+ ma->CoM[0] = (ma_CoM[0] * ma_mass + mb_CoM[0] * mb_mass) * M_tot_inv;
+ ma->CoM[1] = (ma_CoM[1] * ma_mass + mb_CoM[1] * mb_mass) * M_tot_inv;
+ ma->CoM[2] = (ma_CoM[2] * ma_mass + mb_CoM[2] * mb_mass) * M_tot_inv;
+
+/* New quadrupole */
+#if const_gravity_multipole_order >= 2
+ ma->I_xx = (ma_I_xx + mb_I_xx) - M_tot * ma->CoM[0] * ma->CoM[0];
+ ma->I_yy = (ma_I_yy + mb_I_yy) - M_tot * ma->CoM[1] * ma->CoM[1];
+ ma->I_zz = (ma_I_zz + mb_I_zz) - M_tot * ma->CoM[2] * ma->CoM[2];
+ ma->I_xy = (ma_I_xy + mb_I_xy) - M_tot * ma->CoM[0] * ma->CoM[1];
+ ma->I_xz = (ma_I_xz + mb_I_xz) - M_tot * ma->CoM[0] * ma->CoM[2];
+ ma->I_yz = (ma_I_yz + mb_I_yz) - M_tot * ma->CoM[1] * ma->CoM[2];
#endif
}
/**
- * @brief Init a multipole from a set of particles.
+ * @brief Add a particle to the given multipole.
*
* @param m The #multipole.
- * @param parts The #gpart.
- * @param N The number of particles.
+ * @param p The #gpart.
*/
-void multipole_init(struct multipole *m, struct gpart *parts, int N) {
+void multipole_addpart(struct multipole *m, struct gpart *p) {
-#if multipole_order == 1
+ /* #if const_gravity_multipole_order == 1 */
- float mass = 0.0f, w;
- double x[3] = {0.0, 0.0, 0.0};
- int k;
+ /* /\* Correct the position. *\/ */
+ /* float mm = m->coeffs[0], mp = p->mass; */
+ /* float w = 1.0f / (mm + mp); */
+ /* for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + p->x[k] * mp) * w;
+ */
- /* Collect the particle data. */
- for (k = 0; k < N; k++) {
- w = parts[k].mass;
- mass += w;
- x[0] += parts[k].x[0] * w;
- x[1] += parts[k].x[1] * w;
- x[2] += parts[k].x[2] * w;
- }
-
- /* Store the data on the multipole. */
- m->coeffs[0] = mass;
- m->x[0] = x[0] / mass;
- m->x[1] = x[1] / mass;
- m->x[2] = x[2] / mass;
+ /* /\* Add the particle to the moments. *\/ */
+ /* m->coeffs[0] = mm + mp; */
-#else
-#error( "Multipoles of order %i not yet implemented." , multipole_order )
-#endif
+ /* #else */
+ /* #error( "Multipoles of order %i not yet implemented." ,
+ * const_gravity_multipole_order )
+ */
+ /* #endif */
}
/**
- * @brief Reset the data of a #multipole.
+ * @brief Add a group of particles to the given multipole.
*
* @param m The #multipole.
+ * @param p The #gpart array.
+ * @param N Number of parts to add.
*/
-void multipole_reset(struct multipole *m) {
+void multipole_addparts(struct multipole *m, struct gpart *p, int N) {
- /* Just bzero the struct. */
- bzero(m, sizeof(struct multipole));
+ /* #if const_gravity_multipole_order == 1 */
+
+ /* /\* Get the combined mass and positions. *\/ */
+ /* double xp[3] = {0.0, 0.0, 0.0}; */
+ /* float mp = 0.0f, w; */
+ /* for (int k = 0; k < N; k++) { */
+ /* w = p[k].mass; */
+ /* mp += w; */
+ /* xp[0] += p[k].x[0] * w; */
+ /* xp[1] += p[k].x[1] * w; */
+ /* xp[2] += p[k].x[2] * w; */
+ /* } */
+
+ /* /\* Correct the position. *\/ */
+ /* float mm = m->coeffs[0]; */
+ /* w = 1.0f / (mm + mp); */
+ /* for (int k = 0; k < 3; k++) m->x[k] = (m->x[k] * mm + xp[k]) * w; */
+
+ /* /\* Add the particle to the moments. *\/ */
+ /* m->coeffs[0] = mm + mp; */
+
+ /* #else */
+ /* #error( "Multipoles of order %i not yet implemented." ,
+ * const_gravity_multipole_order )
+ */
+ /* #endif */
}
diff --git a/src/multipole.h b/src/multipole.h
index 85ba44d3ce95d958b721d435ccd26b72e30a79c1..dc1f914dd73969bc63f3beffca7f34d0f889edb6 100644
--- a/src/multipole.h
+++ b/src/multipole.h
@@ -1,6 +1,7 @@
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
@@ -28,31 +29,33 @@
#include "kernel_gravity.h"
#include "part.h"
-/* Some constants. */
-#define multipole_order 1
-
/* Multipole struct. */
struct multipole {
/* Multipole location. */
- double x[3];
+ double CoM[3];
- /* Acceleration on this multipole. */
- float a[3];
+ /* Multipole mass */
+ float mass;
- /* Multipole coefficients. */
- float coeffs[multipole_order * multipole_order];
+#if const_gravity_multipole_order >= 2
+ /* Quadrupole terms */
+ float I_xx, I_yy, I_zz;
+ float I_xy, I_xz, I_yz;
+#endif
};
/* Multipole function prototypes. */
-static void multipole_iact_mm(struct multipole *ma, struct multipole *mb,
- double *shift);
-void multipole_merge(struct multipole *ma, struct multipole *mb);
-void multipole_addpart(struct multipole *m, struct gpart *p);
-void multipole_addparts(struct multipole *m, struct gpart *p, int N);
-void multipole_init(struct multipole *m, struct gpart *parts, int N);
+void multipole_add(struct multipole *m_sum, const struct multipole *m_term);
+void multipole_init(struct multipole *m, const struct gpart *gparts,
+ int gcount);
void multipole_reset(struct multipole *m);
+/* static void multipole_iact_mm(struct multipole *ma, struct multipole *mb, */
+/* double *shift); */
+/* void multipole_addpart(struct multipole *m, struct gpart *p); */
+/* void multipole_addparts(struct multipole *m, struct gpart *p, int N); */
+
/**
* @brief Compute the pairwise interaction between two multipoles.
*
@@ -60,39 +63,39 @@ void multipole_reset(struct multipole *m);
* @param mb The second #multipole.
* @param shift The periodicity correction.
*/
-
__attribute__((always_inline)) INLINE static void multipole_iact_mm(
struct multipole *ma, struct multipole *mb, double *shift) {
-
- float dx[3], ir, r, r2 = 0.0f, acc;
- int k;
-
- /* Compute the multipole distance. */
- for (k = 0; k < 3; k++) {
- dx[k] = ma->x[k] - mb->x[k] - shift[k];
- r2 += dx[k] * dx[k];
- }
-
- /* Compute the normalized distance vector. */
- ir = 1.0f / sqrtf(r2);
- r = r2 * ir;
-
- /* Evaluate the gravity kernel. */
- kernel_grav_eval(r, &acc);
-
- /* Scale the acceleration. */
- acc *= const_G * ir * ir * ir;
-
-/* Compute the forces on both multipoles. */
-#if multipole_order == 1
- float mma = ma->coeffs[0], mmb = mb->coeffs[0];
- for (k = 0; k < 3; k++) {
- ma->a[k] -= dx[k] * acc * mmb;
- mb->a[k] += dx[k] * acc * mma;
- }
-#else
-#error( "Multipoles of order %i not yet implemented." , multipole_order )
-#endif
+ /* float dx[3], ir, r, r2 = 0.0f, acc; */
+ /* int k; */
+
+ /* /\* Compute the multipole distance. *\/ */
+ /* for (k = 0; k < 3; k++) { */
+ /* dx[k] = ma->x[k] - mb->x[k] - shift[k]; */
+ /* r2 += dx[k] * dx[k]; */
+ /* } */
+
+ /* /\* Compute the normalized distance vector. *\/ */
+ /* ir = 1.0f / sqrtf(r2); */
+ /* r = r2 * ir; */
+
+ /* /\* Evaluate the gravity kernel. *\/ */
+ /* kernel_grav_eval(r, &acc); */
+
+ /* /\* Scale the acceleration. *\/ */
+ /* acc *= const_G * ir * ir * ir; */
+
+ /* /\* Compute the forces on both multipoles. *\/ */
+ /* #if const_gravity_multipole_order == 1 */
+ /* float mma = ma->coeffs[0], mmb = mb->coeffs[0]; */
+ /* for (k = 0; k < 3; k++) { */
+ /* ma->a[k] -= dx[k] * acc * mmb; */
+ /* mb->a[k] += dx[k] * acc * mma; */
+ /* } */
+ /* #else */
+ /* #error( "Multipoles of order %i not yet implemented." ,
+ * const_gravity_multipole_order )
+ */
+ /* #endif */
}
/**
@@ -102,35 +105,36 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mm(
* @param p The #gpart.
* @param shift The periodicity correction.
*/
-
__attribute__((always_inline)) INLINE static void multipole_iact_mp(
struct multipole *m, struct gpart *p, double *shift) {
- float dx[3], ir, r, r2 = 0.0f, acc;
- int k;
-
- /* Compute the multipole distance. */
- for (k = 0; k < 3; k++) {
- dx[k] = m->x[k] - p->x[k] - shift[k];
- r2 += dx[k] * dx[k];
- }
-
- /* Compute the normalized distance vector. */
- ir = 1.0f / sqrtf(r2);
- r = r2 * ir;
-
- /* Evaluate the gravity kernel. */
- kernel_grav_eval(r, &acc);
-
- /* Scale the acceleration. */
- acc *= const_G * ir * ir * ir * m->coeffs[0];
-
-/* Compute the forces on both multipoles. */
-#if multipole_order == 1
- for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc;
-#else
-#error( "Multipoles of order %i not yet implemented." , multipole_order )
-#endif
+ /* float dx[3], ir, r, r2 = 0.0f, acc; */
+ /* int k; */
+
+ /* /\* Compute the multipole distance. *\/ */
+ /* for (k = 0; k < 3; k++) { */
+ /* dx[k] = m->x[k] - p->x[k] - shift[k]; */
+ /* r2 += dx[k] * dx[k]; */
+ /* } */
+
+ /* /\* Compute the normalized distance vector. *\/ */
+ /* ir = 1.0f / sqrtf(r2); */
+ /* r = r2 * ir; */
+
+ /* /\* Evaluate the gravity kernel. *\/ */
+ /* kernel_grav_eval(r, &acc); */
+
+ /* /\* Scale the acceleration. *\/ */
+ /* acc *= const_G * ir * ir * ir * m->coeffs[0]; */
+
+ /* /\* Compute the forces on both multipoles. *\/ */
+ /* #if const_gravity_multipole_order == 1 */
+ /* for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc; */
+ /* #else */
+ /* #error( "Multipoles of order %i not yet implemented." ,
+ * const_gravity_multipole_order )
+ */
+ /* #endif */
}
#endif /* SWIFT_MULTIPOLE_H */
diff --git a/src/potentials.c b/src/potentials.c
index 6818d8a234a437ca0164c5c991b92d905fd6041e..5cbe05e008b7de17310b1e738e032af90684c25e 100644
--- a/src/potentials.c
+++ b/src/potentials.c
@@ -47,7 +47,7 @@ void potential_init(const struct swift_params* parameter_file,
potential->point_mass.mass =
parser_get_param_double(parameter_file, "PointMass:mass");
potential->point_mass.timestep_mult =
- parser_get_param_double(parameter_file, "PointMass:timestep_mult");
+ parser_get_param_float(parameter_file, "PointMass:timestep_mult");
#endif /* EXTERNAL_POTENTIAL_POINTMASS */
@@ -61,7 +61,7 @@ void potential_init(const struct swift_params* parameter_file,
parser_get_param_double(parameter_file, "IsothermalPotential:position_z");
potential->isothermal_potential.vrot =
parser_get_param_double(parameter_file, "IsothermalPotential:vrot");
- potential->isothermal_potential.timestep_mult = parser_get_param_double(
+ potential->isothermal_potential.timestep_mult = parser_get_param_float(
parameter_file, "IsothermalPotential:timestep_mult");
#endif /* EXTERNAL_POTENTIAL_ISOTHERMALPOTENTIAL */
diff --git a/src/potentials.h b/src/potentials.h
index 403acf12ad555740b6da803dbc1f2cde2223e266..5373cc1f3f55b0c6d9876cbe6348fdcefbe242aa 100644
--- a/src/potentials.h
+++ b/src/potentials.h
@@ -42,7 +42,7 @@ struct external_potential {
struct {
double x, y, z;
double mass;
- double timestep_mult;
+ float timestep_mult;
} point_mass;
#endif
@@ -50,7 +50,7 @@ struct external_potential {
struct {
double x, y, z;
double vrot;
- double timestep_mult;
+ float timestep_mult;
} isothermal_potential;
#endif
};
@@ -95,6 +95,8 @@ external_gravity_isothermalpotential_timestep(
* @brief Computes the gravitational acceleration of a particle due to a point
* mass
*
+ * Note that the accelerations are multiplied by Newton's G constant later on.
+ *
* @param potential The #external_potential used in the run.
* @param phys_const The physical constants in internal units.
* @param g Pointer to the g-particle data.
@@ -104,15 +106,19 @@ external_gravity_isothermalpotential(const struct external_potential* potential,
const struct phys_const* const phys_const,
struct gpart* g) {
+ const float G_newton = phys_const->const_newton_G;
+
const float dx = g->x[0] - potential->isothermal_potential.x;
const float dy = g->x[1] - potential->isothermal_potential.y;
const float dz = g->x[2] - potential->isothermal_potential.z;
const float rinv2 = 1.f / (dx * dx + dy * dy + dz * dz);
const double vrot = potential->isothermal_potential.vrot;
- g->a_grav[0] += -vrot * vrot * rinv2 * dx;
- g->a_grav[1] += -vrot * vrot * rinv2 * dy;
- g->a_grav[2] += -vrot * vrot * rinv2 * dz;
+ const double term = -vrot * vrot * rinv2 / G_newton;
+
+ g->a_grav[0] += term * dx;
+ g->a_grav[1] += term * dy;
+ g->a_grav[2] += term * dz;
// error(" %f %f %f %f", vrot, rinv2, dx, g->a_grav[0]);
}
@@ -158,6 +164,8 @@ external_gravity_pointmass_timestep(const struct external_potential* potential,
* @brief Computes the gravitational acceleration of a particle due to a point
* mass
*
+ * Note that the accelerations are multiplied by Newton's G constant later on.
+ *
* @param potential The proerties of the external potential.
* @param phys_const The physical constants in internal units.
* @param g Pointer to the g-particle data.
@@ -166,18 +174,15 @@ __attribute__((always_inline)) INLINE static void external_gravity_pointmass(
const struct external_potential* potential,
const struct phys_const* const phys_const, struct gpart* g) {
- const float G_newton = phys_const->const_newton_G;
const float dx = g->x[0] - potential->point_mass.x;
const float dy = g->x[1] - potential->point_mass.y;
const float dz = g->x[2] - potential->point_mass.z;
const float rinv = 1.f / sqrtf(dx * dx + dy * dy + dz * dz);
+ const float rinv3 = rinv * rinv * rinv;
- g->a_grav[0] +=
- -G_newton * potential->point_mass.mass * dx * rinv * rinv * rinv;
- g->a_grav[1] +=
- -G_newton * potential->point_mass.mass * dy * rinv * rinv * rinv;
- g->a_grav[2] +=
- -G_newton * potential->point_mass.mass * dz * rinv * rinv * rinv;
+ g->a_grav[0] += -potential->point_mass.mass * dx * rinv3;
+ g->a_grav[1] += -potential->point_mass.mass * dy * rinv3;
+ g->a_grav[2] += -potential->point_mass.mass * dz * rinv3;
}
#endif /* EXTERNAL_POTENTIAL_POINTMASS */
diff --git a/src/runner.c b/src/runner.c
index 01762dda28aaa8f1fa3e240824761d90c9f8f702..6e99d8cece2cf086b1209ac2f8314301b9779e20 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -87,6 +87,10 @@ const char runner_flip[27] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
#define FUNCTION force
#include "runner_doiact.h"
+/* Import the gravity loop functions. */
+#include "runner_doiact_fft.h"
+#include "runner_doiact_grav.h"
+
/**
* @brief Calculate gravity acceleration from external potential
*
@@ -115,7 +119,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
OUT;
#endif
- /* Loop over the parts in this cell. */
+ /* Loop over the gparts in this cell. */
for (int i = 0; i < gcount; i++) {
/* Get a direct pointer on the part. */
@@ -127,6 +131,7 @@ void runner_do_grav_external(struct runner *r, struct cell *c, int timer) {
external_gravity(potential, constants, g);
}
}
+
if (timer) TIMER_TOC(timer_dograv_external);
}
@@ -424,11 +429,9 @@ void runner_do_init(struct runner *r, struct cell *c, int timer) {
*/
void runner_do_ghost(struct runner *r, struct cell *c) {
- struct part *p, *parts = c->parts;
- struct xpart *xp, *xparts = c->xparts;
- struct cell *finger;
+ struct part *restrict parts = c->parts;
+ struct xpart *restrict xparts = c->xparts;
int redo, count = c->count;
- float h_corr;
const int ti_current = r->e->ti_current;
const double timeBase = r->e->timeBase;
const float target_wcount = r->e->hydro_properties->target_neighbours;
@@ -465,8 +468,8 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
for (int i = 0; i < count; i++) {
/* Get a direct pointer on the part. */
- p = &parts[pid[i]];
- xp = &xparts[pid[i]];
+ struct part *restrict p = &parts[pid[i]];
+ struct xpart *restrict xp = &xparts[pid[i]];
/* Is this part within the timestep? */
if (p->ti_end <= ti_current) {
@@ -474,6 +477,8 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
/* Finish the density calculation */
hydro_end_density(p, ti_current);
+ float h_corr = 0.f;
+
/* If no derivative, double the smoothing length. */
if (p->density.wcount_dh == 0.0f) h_corr = p->h;
@@ -526,7 +531,7 @@ void runner_do_ghost(struct runner *r, struct cell *c) {
if (count > 0) {
/* Climb up the cell hierarchy. */
- for (finger = c; finger != NULL; finger = finger->parent) {
+ for (struct cell *finger = c; finger != NULL; finger = finger->parent) {
/* Run through this cell's density interactions. */
for (struct link *l = finger->density; l != NULL; l = l->next) {
@@ -745,6 +750,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) {
struct part *restrict parts = c->parts;
struct xpart *restrict xparts = c->xparts;
struct gpart *restrict gparts = c->gparts;
+ const double const_G = r->e->physical_constants->const_newton_G;
int updated = 0, g_updated = 0;
int ti_end_min = max_nr_timesteps, ti_end_max = 0;
@@ -771,7 +777,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) {
if (gp->id_or_neg_offset > 0) {
/* First, finish the force calculation */
- gravity_end_force(gp);
+ gravity_end_force(gp, const_G);
/* Kick the g-particle forward */
kick_gpart(gp, new_dti, timeBase);
@@ -796,7 +802,7 @@ void runner_do_kick_fixdt(struct runner *r, struct cell *c, int timer) {
/* First, finish the force loop */
hydro_end_force(p);
- if (p->gpart != NULL) gravity_end_force(p->gpart);
+ if (p->gpart != NULL) gravity_end_force(p->gpart, const_G);
/* Kick the particle forward */
kick_part(p, xp, new_dti, timeBase);
@@ -857,6 +863,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) {
struct part *restrict parts = c->parts;
struct xpart *restrict xparts = c->xparts;
struct gpart *restrict gparts = c->gparts;
+ const double const_G = r->e->physical_constants->const_newton_G;
int updated = 0, g_updated = 0;
int ti_end_min = max_nr_timesteps, ti_end_max = 0;
@@ -882,7 +889,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) {
if (gp->ti_end <= ti_current) {
/* First, finish the force calculation */
- gravity_end_force(gp);
+ gravity_end_force(gp, const_G);
/* Compute the next timestep */
const int new_dti = get_gpart_timestep(gp, e);
@@ -914,7 +921,7 @@ void runner_do_kick(struct runner *r, struct cell *c, int timer) {
/* First, finish the force loop */
hydro_end_force(p);
- if (p->gpart != NULL) gravity_end_force(p->gpart);
+ if (p->gpart != NULL) gravity_end_force(p->gpart, const_G);
/* Compute the next timestep (hydro condition) */
const int new_dti = get_part_timestep(p, xp, e);
@@ -1071,6 +1078,8 @@ void *runner_main(void *data) {
runner_doself1_density(r, ci);
else if (t->subtype == task_subtype_force)
runner_doself2_force(r, ci);
+ else if (t->subtype == task_subtype_grav)
+ runner_doself_grav(r, ci, 1);
else
error("Unknown task subtype.");
break;
@@ -1079,6 +1088,8 @@ void *runner_main(void *data) {
runner_dopair1_density(r, ci, cj);
else if (t->subtype == task_subtype_force)
runner_dopair2_force(r, ci, cj);
+ else if (t->subtype == task_subtype_grav)
+ runner_dopair_grav(r, ci, cj, 1);
else
error("Unknown task subtype.");
break;
@@ -1090,6 +1101,8 @@ void *runner_main(void *data) {
runner_dosub_self1_density(r, ci, 1);
else if (t->subtype == task_subtype_force)
runner_dosub_self2_force(r, ci, 1);
+ else if (t->subtype == task_subtype_grav)
+ runner_dosub_grav(r, ci, cj, 1);
else
error("Unknown task subtype.");
break;
@@ -1098,6 +1111,8 @@ void *runner_main(void *data) {
runner_dosub_pair1_density(r, ci, cj, t->flags, 1);
else if (t->subtype == task_subtype_force)
runner_dosub_pair2_force(r, ci, cj, t->flags, 1);
+ else if (t->subtype == task_subtype_grav)
+ runner_dosub_grav(r, ci, cj, 1);
else
error("Unknown task subtype.");
break;
@@ -1129,6 +1144,17 @@ void *runner_main(void *data) {
runner_do_recv_cell(r, ci, 1);
}
break;
+ case task_type_grav_mm:
+ runner_do_grav_mm(r, t->ci, 1);
+ break;
+ case task_type_grav_up:
+ runner_do_grav_up(r, t->ci);
+ break;
+ case task_type_grav_gather_m:
+ break;
+ case task_type_grav_fft:
+ runner_do_grav_fft(r);
+ break;
case task_type_grav_external:
runner_do_grav_external(r, t->ci, 1);
break;
diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c
new file mode 100644
index 0000000000000000000000000000000000000000..076ec2578361127266c637cc5ac224609b702c66
--- /dev/null
+++ b/src/runner_doiact_fft.c
@@ -0,0 +1,36 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+
+/* Config parameters. */
+#include "../config.h"
+
+/* Some standard headers. */
+#include
+
+#ifdef HAVE_FFTW
+#include
+#endif
+
+/* This object's header. */
+#include "runner_doiact_fft.h"
+
+/* Local includes. */
+#include "runner.h"
+
+void runner_do_grav_fft(struct runner *r) {}
diff --git a/src/runner_doiact_fft.h b/src/runner_doiact_fft.h
new file mode 100644
index 0000000000000000000000000000000000000000..263662383fb465dcf945e55494a569289b009ff9
--- /dev/null
+++ b/src/runner_doiact_fft.h
@@ -0,0 +1,26 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+#ifndef SWIFT_RUNNER_DOIACT_FFT_H
+#define SWIFT_RUNNER_DOIACT_FFT_H
+
+struct runner;
+
+void runner_do_grav_fft(struct runner *r);
+
+#endif /* SWIFT_RUNNER_DOIACT_FFT_H */
diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h
index 401e48ffbb92218aecafd21a3f70c44c9a7b0a1b..a220ad1794d23999ff16752e797a499071fa2e65 100644
--- a/src/runner_doiact_grav.h
+++ b/src/runner_doiact_grav.h
@@ -1,6 +1,7 @@
/*******************************************************************************
* This file is part of SWIFT.
* Copyright (c) 2013 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
+ * 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
@@ -19,575 +20,502 @@
#ifndef SWIFT_RUNNER_DOIACT_GRAV_H
#define SWIFT_RUNNER_DOIACT_GRAV_H
+/* Includes. */
+#include "cell.h"
+#include "gravity.h"
+#include "part.h"
+
+#define ICHECK -1000
+
/**
- * @brief Compute the sorted gravity interactions between a cell pair.
+ * @brief Compute the recursive upward sweep, i.e. construct the
+ * multipoles in a cell hierarchy.
*
* @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
+ * @param c The top-level #cell.
*/
+void runner_do_grav_up(struct runner *r, struct cell *c) {
+
+ if (c->split) { /* Regular node */
+
+ /* Recurse. */
+ for (int k = 0; k < 8; k++)
+ if (c->progeny[k] != NULL) runner_do_grav_up(r, c->progeny[k]);
+
+ /* Collect the multipoles from the progeny. */
+ multipole_reset(&c->multipole);
+ for (int k = 0; k < 8; k++) {
+ if (c->progeny[k] != NULL)
+ multipole_add(&c->multipole, &c->progeny[k]->multipole);
+ }
-void runner_dopair_grav_new(struct runner *r, struct cell *ci,
- struct cell *cj) {
-
- struct engine *restrict e = r->e;
- int pid, pjd, k, sid;
- double rshift, shift[3] = {0.0, 0.0, 0.0}, nshift[3];
- struct entry *restrict sort_i, *restrict sort_j;
- struct gpart *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
- double pix[3];
- float dx[3], r2, h_max, di, dj;
- int count_i, count_j, cnj, cnj_new;
+ } else { /* Leaf node. */
+ /* Just construct the multipole from the gparts. */
+ multipole_init(&c->multipole, c->gparts, c->gcount);
+ }
+}
+
+/**
+ * @brief Computes the interaction of all the particles in a cell with the
+ * multipole of another cell.
+ *
+ * @param r The #runner.
+ * @param ci The #cell with particles to interct.
+ * @param cj The #cell with the multipole.
+ */
+__attribute__((always_inline)) INLINE static void runner_dopair_grav_pm(
+ const struct runner *r, const struct cell *restrict ci,
+ const struct cell *restrict cj) {
+
+ const struct engine *e = r->e;
+ const int gcount = ci->gcount;
+ struct gpart *restrict gparts = ci->gparts;
+ const struct multipole multi = cj->multipole;
const int ti_current = e->ti_current;
- struct multipole m;
-#ifdef WITH_VECTORIZATION
- int icount = 0;
- float r2q[VEC_SIZE] __attribute__((aligned(16)));
- float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
- struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE];
+ const float rlr_inv = 1. / (const_gravity_a_smooth * ci->super->width[0]);
+
+ TIMER_TIC;
+
+#ifdef SWIFT_DEBUG_CHECKS
+ if (gcount == 0) error("Empty cell!"); // MATTHIEU sanity check
+
+ if (multi.mass == 0.0) // MATTHIEU sanity check
+ error("Multipole does not seem to have been set.");
#endif
- TIMER_TIC
/* Anything to do here? */
- if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
+ if (ci->ti_end_min > ti_current) return;
- /* Get the sort ID. */
- sid = space_getsid(e->s, &ci, &cj, shift);
-
- /* Make sure the cells are sorted. */
- // runner_do_gsort(r, ci, (1 << sid), 0);
- // runner_do_gsort(r, cj, (1 << sid), 0);
-
- /* Have the cells been sorted? */
- if (!(ci->gsorted & (1 << sid)) || !(cj->gsorted & (1 << sid)))
- error("Trying to interact unsorted cells.");
-
- /* Get the cutoff shift. */
- for (rshift = 0.0, k = 0; k < 3; k++)
- rshift += shift[k] * runner_shift[sid][k];
-
- /* Pick-out the sorted lists. */
- sort_i = &ci->gsort[sid * (ci->count + 1)];
- sort_j = &cj->gsort[sid * (cj->count + 1)];
-
- /* Get some other useful values. */
- h_max = sqrtf(ci->width[0] * ci->width[0] + ci->width[1] * ci->width[1] +
- ci->width[2] * ci->width[2]) *
- const_theta_max;
- count_i = ci->gcount;
- count_j = cj->gcount;
- parts_i = ci->gparts;
- parts_j = cj->gparts;
- cnj = count_j;
- multipole_reset(&m);
- nshift[0] = -shift[0];
- nshift[1] = -shift[1];
- nshift[2] = -shift[2];
-
- /* Loop over the parts in ci. */
- for (pid = count_i - 1; pid >= 0; pid--) {
+#if ICHECK > 0
+ for (int pid = 0; pid < gcount; pid++) {
/* Get a hold of the ith part in ci. */
- pi = &parts_i[sort_i[pid].i];
- if (pi->ti_end > ti_current) continue;
- di = sort_i[pid].d + h_max - rshift;
-
- for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+ struct gpart *restrict gp = &gparts[pid];
- /* Loop over the parts in cj. */
- for (pjd = 0; pjd < cnj && sort_j[pjd].d < di; pjd++) {
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count= %d",
+ gp->id_or_neg_offset, cj->loc[0], cj->loc[1], cj->loc[2],
+ cj->width[0], cj->gcount);
+ }
+#endif
- /* Get a pointer to the jth particle. */
- pj = &parts_j[sort_j[pjd].i];
+ /* Loop over every particle in leaf. */
+ for (int pid = 0; pid < gcount; pid++) {
- /* Compute the pairwise distance. */
- r2 = 0.0f;
- for (k = 0; k < 3; k++) {
- dx[k] = pix[k] - pj->x[k];
- r2 += dx[k] * dx[k];
- }
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gp = &gparts[pid];
-#ifndef WITH_VECTORIZATION
+ if (gp->ti_end > ti_current) continue;
- // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 )
- // message( "interacting particles pi=%lli and pj=%lli with r=%.3e in
- // cells %lli/%lli." , pi->part->id , pj->part->id , sqrtf(r2) , ((long
- // long int)ci) / sizeof(struct cell) , ((long long int)cj) /
- // sizeof(struct cell) );
+ /* Compute the pairwise distance. */
+ const float dx[3] = {multi.CoM[0] - gp->x[0], // x
+ multi.CoM[1] - gp->x[1], // y
+ multi.CoM[2] - gp->x[2]}; // z
+ const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
- runner_iact_grav(r2, dx, pi, pj);
+ /* Interact !*/
+ runner_iact_grav_pm(rlr_inv, r2, dx, gp, &multi);
+ }
-#else
+ TIMER_TOC(timer_dopair_grav_pm);
+}
- /* Add this interaction to the queue. */
- r2q[icount] = r2;
- dxq[3 * icount + 0] = dx[0];
- dxq[3 * icount + 1] = dx[1];
- dxq[3 * icount + 2] = dx[2];
- piq[icount] = pi;
- pjq[icount] = pj;
- icount += 1;
+/**
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * particles of another cell.
+ *
+ * @param r The #runner.
+ * @param ci The first #cell.
+ * @param cj The other #cell.
+ *
+ * @todo Use a local cache for the particles.
+ */
+__attribute__((always_inline)) INLINE static void runner_dopair_grav_pp(
+ struct runner *r, struct cell *ci, struct cell *cj) {
+
+ const struct engine *e = r->e;
+ const int gcount_i = ci->gcount;
+ const int gcount_j = cj->gcount;
+ struct gpart *restrict gparts_i = ci->gparts;
+ struct gpart *restrict gparts_j = cj->gparts;
+ const int ti_current = e->ti_current;
+ const float rlr_inv = 1. / (const_gravity_a_smooth * ci->super->width[0]);
- /* Flush? */
- if (icount == VEC_SIZE) {
- runner_iact_vec_grav(r2q, dxq, piq, pjq);
- icount = 0;
- }
+ TIMER_TIC;
+#ifdef SWIFT_DEBUG_CHECKS
+ if (ci->width[0] != cj->width[0]) // MATTHIEU sanity check
+ error("Non matching cell sizes !! h_i=%f h_j=%f", ci->width[0],
+ cj->width[0]);
#endif
- } /* loop over the parts in cj. */
+ /* Anything to do here? */
+ if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
+
+#if ICHECK > 0
+ for (int pid = 0; pid < gcount_i; pid++) {
- /* Set the new limit. */
- cnj_new = pjd;
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gp = &gparts_i[pid];
- /* Add trailing parts to the multipole. */
- for (pjd = cnj_new; pjd < cnj; pjd++) {
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count= %d",
+ gp->id_or_neg_offset, cj->loc[0], cj->loc[1], cj->loc[2],
+ cj->width[0], cj->gcount);
+ }
- /* Add the part to the multipole. */
- multipole_addpart(&m, &parts_j[sort_j[pjd].i]);
+ for (int pid = 0; pid < gcount_j; pid++) {
- } /* add trailing parts to the multipole. */
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gp = &gparts_j[pid];
- /* Set the new cnj. */
- cnj = cnj_new;
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count=%d",
+ gp->id_or_neg_offset, ci->loc[0], ci->loc[1], ci->loc[2],
+ ci->width[0], ci->gcount);
+ }
+#endif
- /* Interact the ith particle with the multipole. */
- multipole_iact_mp(&m, pi, nshift);
+ /* Loop over all particles in ci... */
+ for (int pid = 0; pid < gcount_i; pid++) {
- } /* loop over the parts in ci. */
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gpi = &gparts_i[pid];
-#ifdef WITH_VECTORIZATION
- /* Pick up any leftovers. */
- if (icount > 0)
- for (k = 0; k < icount; k++)
- runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]);
-#endif
+ /* Loop over every particle in the other cell. */
+ for (int pjd = 0; pjd < gcount_j; pjd++) {
- /* Re-set the multipole. */
- multipole_reset(&m);
+ /* Get a hold of the jth part in cj. */
+ struct gpart *restrict gpj = &gparts_j[pjd];
- /* Loop over the parts in cj and interact with the multipole in ci. */
- for (pid = count_i - 1, pjd = 0; pjd < count_j; pjd++) {
+ /* Compute the pairwise distance. */
+ float dx[3] = {gpi->x[0] - gpj->x[0], // x
+ gpi->x[1] - gpj->x[1], // y
+ gpi->x[2] - gpj->x[2]}; // z
+ const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
- /* Get the position of pj along the axis. */
- dj = sort_j[pjd].d - h_max + rshift;
+ /* Interact ! */
+ if (gpi->ti_end <= ti_current && gpj->ti_end <= ti_current) {
- /* Add any left-over parts in cell_i to the multipole. */
- while (pid >= 0 && sort_i[pid].d < dj) {
+ runner_iact_grav_pp(rlr_inv, r2, dx, gpi, gpj);
- /* Add this particle to the multipole. */
- multipole_addpart(&m, &parts_i[sort_i[pid].i]);
+ } else {
- /* Decrease pid. */
- pid -= 1;
- }
+ if (gpi->ti_end <= ti_current) {
- /* Interact pj with the multipole. */
- multipole_iact_mp(&m, &parts_j[sort_j[pjd].i], shift);
+ runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj);
- } /* loop over the parts in cj and interact with the multipole. */
+ } else if (gpj->ti_end <= ti_current) {
- TIMER_TOC(TIMER_DOPAIR);
+ dx[0] = -dx[0];
+ dx[1] = -dx[1];
+ dx[2] = -dx[2];
+ runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi);
+ }
+ }
+ }
+ }
+
+ TIMER_TOC(timer_dopair_grav_pp);
}
/**
- * @brief Compute the recursive upward sweep, i.e. construct the
- * multipoles in a cell hierarchy.
+ * @brief Computes the interaction of all the particles in a cell directly
*
* @param r The #runner.
- * @param c The top-level #cell.
+ * @param c The #cell.
+ *
+ * @todo Use a local cache for the particles.
*/
+__attribute__((always_inline)) INLINE static void runner_doself_grav_pp(
+ struct runner *r, struct cell *c) {
+
+ const struct engine *e = r->e;
+ const int gcount = c->gcount;
+ struct gpart *restrict gparts = c->gparts;
+ const int ti_current = e->ti_current;
+ const float rlr_inv = 1. / (const_gravity_a_smooth * c->super->width[0]);
-void runner_dograv_up(struct runner *r, struct cell *c) {
+ TIMER_TIC;
- /* Re-set this cell's multipole. */
- multipole_reset(&c->multipole);
+#ifdef SWIFT_DEBUG_CHECKS
+ if (c->gcount == 0) // MATTHIEU sanity check
+ error("Empty cell !");
+#endif
- /* Split? */
- if (c->split) {
+ /* Anything to do here? */
+ if (c->ti_end_min > ti_current) return;
- /* Recurse. */
- for (int k = 0; k < 8; k++)
- if (c->progeny[k] != NULL) runner_dograv_up(r, c->progeny[k]);
+#if ICHECK > 0
+ for (int pid = 0; pid < gcount; pid++) {
- /* Collect the multipoles from the progeny. */
- multipole_reset(&c->multipole);
- for (int k = 0; k < 8; k++)
- if (c->progeny[k] != NULL)
- multipole_merge(&c->multipole, &c->progeny[k]->multipole);
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gp = &gparts[pid];
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count= %d",
+ gp->id_or_neg_offset, c->loc[0], c->loc[1], c->loc[2],
+ c->width[0], c->gcount);
}
+#endif
- /* No, leaf node. */
- else
+ /* Loop over all particles in ci... */
+ for (int pid = 0; pid < gcount; pid++) {
- /* Just collect the multipole. */
- multipole_init(&c->multipole, c->gparts, c->gcount);
-}
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gpi = &gparts[pid];
-/**
- * @brief Compute the recursive downward sweep, i.e. apply the multipole
- * acceleration on all the particles.
- *
- * @param r The #runner.
- * @param c The top-level #cell.
- */
+ /* Loop over every particle in the other cell. */
+ for (int pjd = pid + 1; pjd < gcount; pjd++) {
+
+ /* Get a hold of the jth part in ci. */
+ struct gpart *restrict gpj = &gparts[pjd];
-void runner_dograv_down(struct runner *r, struct cell *c) {
+ /* Compute the pairwise distance. */
+ float dx[3] = {gpi->x[0] - gpj->x[0], // x
+ gpi->x[1] - gpj->x[1], // y
+ gpi->x[2] - gpj->x[2]}; // z
+ const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
- struct multipole *m = &c->multipole;
+ /* Interact ! */
+ if (gpi->ti_end <= ti_current && gpj->ti_end <= ti_current) {
- /* Split? */
- if (c->split) {
+ runner_iact_grav_pp(rlr_inv, r2, dx, gpi, gpj);
- /* Apply this cell's acceleration on the multipoles below. */
- for (int k = 0; k < 8; k++)
- if (c->progeny[k] != NULL) {
- struct multipole *mp = &c->progeny[k]->multipole;
- mp->a[0] += m->a[0];
- mp->a[1] += m->a[1];
- mp->a[2] += m->a[2];
- }
+ } else {
- /* Recurse. */
- for (int k = 0; k < 8; k++)
- if (c->progeny[k] != NULL) runner_dograv_down(r, c->progeny[k]);
+ if (gpi->ti_end <= ti_current) {
- }
+ runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpi, gpj);
- /* No, leaf node. */
- else {
+ } else if (gpj->ti_end <= ti_current) {
- /* Apply the multipole acceleration to all gparts. */
- for (int k = 0; k < c->gcount; k++) {
- struct gpart *p = &c->gparts[k];
- p->a_grav[0] += m->a[0];
- p->a_grav[1] += m->a[1];
- p->a_grav[2] += m->a[2];
+ dx[0] = -dx[0];
+ dx[1] = -dx[1];
+ dx[2] = -dx[2];
+ runner_iact_grav_pp_nonsym(rlr_inv, r2, dx, gpj, gpi);
+ }
+ }
}
}
+
+ TIMER_TOC(timer_doself_grav_pp);
}
/**
- * @brief Compute the multipole-multipole interaction between two cells.
+ * @brief Computes the interaction of all the particles in a cell with all the
+ * particles of another cell.
*
* @param r The #runner.
* @param ci The first #cell.
- * @param cj The second #cell.
+ * @param cj The other #cell.
+ * @param gettimer Are we timing this ?
+ *
+ * @todo Use a local cache for the particles.
*/
+static void runner_dopair_grav(struct runner *r, struct cell *ci,
+ struct cell *cj, int gettimer) {
-void runner_dograv_mm(struct runner *r, struct cell *restrict ci,
- struct cell *restrict cj) {
-
- struct engine *e = r->e;
- int k;
- double shift[3] = {0.0, 0.0, 0.0};
- float dx[3], theta;
-
- /* Compute the shift between the cells. */
- for (k = 0; k < 3; k++) {
- dx[k] = cj->loc[k] - ci->loc[k];
- if (r->e->s->periodic) {
- if (dx[k] < -e->s->dim[k] / 2)
- shift[k] = e->s->dim[k];
- else if (dx[k] > e->s->dim[k] / 2)
- shift[k] = -e->s->dim[k];
- dx[k] += shift[k];
- }
- }
- theta = sqrt((dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) /
- (ci->width[0] * ci->width[0] + ci->width[1] * ci->width[1] +
- ci->width[2] * ci->width[2]));
+#ifdef SWIFT_DEBUG_CHECKS
- /* Do an MM or an MP/PM? */
- if (theta > const_theta_max * 4) {
+ const int gcount_i = ci->gcount;
+ const int gcount_j = cj->gcount;
- /* Update the multipoles. */
- multipole_iact_mm(&ci->multipole, &cj->multipole, shift);
+ /* Early abort? */
+ if (gcount_i == 0 || gcount_j == 0) error("Empty cell !");
- } else {
+ /* Bad stuff will happen if cell sizes are different */
+ if (ci->width[0] != cj->width[0])
+ error("Non matching cell sizes !! h_i=%f h_j=%f", ci->width[0],
+ cj->width[0]);
- /* Interact the multipoles via their parts. */
- for (k = 0; k < ci->gcount; k++)
- multipole_iact_mp(&cj->multipole, &ci->gparts[k], shift);
- for (k = 0; k < cj->gcount; k++)
- multipole_iact_mp(&ci->multipole, &cj->gparts[k], shift);
- }
-}
+ /* Sanity check */
+ if (ci == cj)
+ error(
+ "The impossible has happened: pair interaction between a cell and "
+ "itself.");
-/**
- * @brief Compute the interactions between a cell pair.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- */
+ /* Are the cells direct neighbours? */
+ if (!cell_are_neighbours(ci, cj))
+ error(
+ "Non-neighbouring cells ! ci->x=[%f %f %f] ci->width=%f cj->loc=[%f %f "
+ "%f] "
+ "cj->width=%f",
+ ci->loc[0], ci->loc[1], ci->loc[2], ci->width[0], cj->loc[0],
+ cj->loc[1], cj->loc[2], cj->width[0]);
-void runner_dopair_grav(struct runner *r, struct cell *restrict ci,
- struct cell *restrict cj) {
-
- struct engine *e = r->e;
- int pid, pjd, k, count_i = ci->gcount, count_j = cj->gcount;
- double shift[3] = {0.0, 0.0, 0.0};
- struct gpart *restrict parts_i = ci->gparts, *restrict parts_j = cj->gparts;
- struct gpart *restrict pi, *restrict pj;
- double pix[3];
- float dx[3], r2;
- const int ti_current = r->e->ti_current;
-#ifdef WITH_VECTORIZATION
- int icount = 0;
- float r2q[VEC_SIZE] __attribute__((aligned(16)));
- float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
- struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE];
#endif
- TIMER_TIC
- /* Anything to do here? */
- if (ci->ti_end_min > ti_current && cj->ti_end_min > ti_current) return;
-
- /* Get the relative distance between the pairs, wrapping. */
- if (e->s->periodic)
- for (k = 0; k < 3; k++) {
- if (cj->loc[k] - ci->loc[k] < -e->s->dim[k] / 2)
- shift[k] = e->s->dim[k];
- else if (cj->loc[k] - ci->loc[k] > e->s->dim[k] / 2)
- shift[k] = -e->s->dim[k];
- }
-
- /* Loop over the parts in ci. */
- for (pid = 0; pid < count_i; pid++) {
+#if ICHECK > 0
+ for (int pid = 0; pid < ci->gcount; pid++) {
/* Get a hold of the ith part in ci. */
- pi = &parts_i[pid];
- for (k = 0; k < 3; k++) pix[k] = pi->x[k] - shift[k];
+ struct gpart *restrict gp = &ci->gparts[pid];
- /* Loop over the parts in cj. */
- for (pjd = 0; pjd < count_j; pjd++) {
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count= %d",
+ gp->id_or_neg_offset, cj->loc[0], cj->loc[1], cj->loc[2],
+ cj->width[0], cj->gcount);
+ }
- /* Get a pointer to the jth particle. */
- pj = &parts_j[pjd];
+ for (int pid = 0; pid < cj->gcount; pid++) {
- /* Compute the pairwise distance. */
- r2 = 0.0f;
- for (k = 0; k < 3; k++) {
- dx[k] = pix[k] - pj->x[k];
- r2 += dx[k] * dx[k];
- }
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gp = &cj->gparts[pid];
-/* Compute the interaction. */
-#ifndef WITH_VECTORIZATION
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count= %d",
+ gp->id_or_neg_offset, ci->loc[0], ci->loc[1], ci->loc[2],
+ ci->width[0], ci->gcount);
+ }
+#endif
- // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 )
- // message( "interacting particles pi=%lli and pj=%lli with r=%.3e in
- // cells %lli/%lli." , pi->part->id , pj->part->id , sqrtf(r2) , ((long
- // long int)ci) / sizeof(struct cell) , ((long long int)cj) /
- // sizeof(struct cell) );
+ TIMER_TIC;
- runner_iact_grav(r2, dx, pi, pj);
+ /* Are both cells split ? */
+ if (ci->split && cj->split) {
-#else
+ for (int j = 0; j < 8; j++) {
+ if (ci->progeny[j] != NULL) {
- /* Add this interaction to the queue. */
- r2q[icount] = r2;
- dxq[3 * icount + 0] = dx[0];
- dxq[3 * icount + 1] = dx[1];
- dxq[3 * icount + 2] = dx[2];
- piq[icount] = pi;
- pjq[icount] = pj;
- icount += 1;
+ for (int k = 0; k < 8; k++) {
+ if (cj->progeny[k] != NULL) {
- /* Flush? */
- if (icount == VEC_SIZE) {
- runner_iact_vec_grav(r2q, dxq, piq, pjq);
- icount = 0;
- }
+ if (cell_are_neighbours(ci->progeny[j], cj->progeny[k])) {
-#endif
+ /* Recurse */
+ runner_dopair_grav(r, ci->progeny[j], cj->progeny[k], 0);
- } /* loop over the parts in cj. */
+ } else {
- } /* loop over the parts in ci. */
+ /* Ok, here we can go for particle-multipole interactions */
+ runner_dopair_grav_pm(r, ci->progeny[j], cj->progeny[k]);
+ runner_dopair_grav_pm(r, cj->progeny[k], ci->progeny[j]);
+ }
+ }
+ }
+ }
+ }
+ } else { /* Not split */
-#ifdef WITH_VECTORIZATION
- /* Pick up any leftovers. */
- if (icount > 0)
- for (k = 0; k < icount; k++)
- runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]);
-#endif
+ /* Compute the interactions at this level directly. */
+ runner_dopair_grav_pp(r, ci, cj);
+ }
- TIMER_TOC(timer_dopair_grav);
+ if (gettimer) TIMER_TOC(timer_dosub_pair_grav);
}
/**
- * @brief Compute the interactions within a cell.
+ * @brief Computes the interaction of all the particles in a cell
*
* @param r The #runner.
- * @param c The #cell.
+ * @param c The first #cell.
+ * @param gettimer Are we timing this ?
+ *
+ * @todo Use a local cache for the particles.
*/
+static void runner_doself_grav(struct runner *r, struct cell *c, int gettimer) {
+
+#ifdef SWIFT_DEBUG_CHECKS
-void runner_doself_grav(struct runner *r, struct cell *restrict c) {
-
- int pid, pjd, k, count = c->gcount;
- struct gpart *restrict parts = c->gparts;
- struct gpart *restrict pi, *restrict pj;
- double pix[3] = {0.0, 0.0, 0.0};
- float dx[3], r2;
- const int ti_current = r->e->ti_current;
-#ifdef WITH_VECTORIZATION
- int icount = 0;
- float r2q[VEC_SIZE] __attribute__((aligned(16)));
- float dxq[3 * VEC_SIZE] __attribute__((aligned(16)));
- struct gpart *piq[VEC_SIZE], *pjq[VEC_SIZE];
+ /* Early abort? */
+ if (c->gcount == 0) error("Empty cell !");
#endif
- TIMER_TIC
- /* Anything to do here? */
- if (c->ti_end_min > ti_current) return;
+ TIMER_TIC;
- /* Loop over every part in c. */
- for (pid = 0; pid < count; pid++) {
+ /* If the cell is split, interact each progeny with itself, and with
+ each of its siblings. */
+ if (c->split) {
- /* Get a hold of the ith part in ci. */
- pi = &parts[pid];
- for (k = 0; k < 3; k++) pix[k] = pi->x[k];
+ for (int j = 0; j < 8; j++) {
+ if (c->progeny[j] != NULL) {
- /* Loop over every other part in c. */
- for (pjd = pid + 1; pjd < count; pjd++) {
+ runner_doself_grav(r, c->progeny[j], 0);
- /* Get a pointer to the jth particle. */
- pj = &parts[pjd];
+ for (int k = j + 1; k < 8; k++) {
+ if (c->progeny[k] != NULL) {
- /* Compute the pairwise distance. */
- r2 = 0.0f;
- for (k = 0; k < 3; k++) {
- dx[k] = pix[k] - pj->x[k];
- r2 += dx[k] * dx[k];
+ runner_dopair_grav(r, c->progeny[j], c->progeny[k], 0);
+ }
+ }
}
+ }
+ }
-/* Compute the interaction. */
-#ifndef WITH_VECTORIZATION
-
- // if ( pi->part->id == 3473472412525 || pj->part->id == 3473472412525 )
- // message( "interacting particles pi=%lli and pj=%lli with r=%.3e." ,
- // pi->part->id , pj->part->id , sqrtf(r2) );
-
- runner_iact_grav(r2, dx, pi, pj);
+ /* If the cell is not split, then just go for it... */
+ else {
-#else
+ runner_doself_grav_pp(r, c);
+ }
- /* Add this interaction to the queue. */
- r2q[icount] = r2;
- dxq[3 * icount + 0] = dx[0];
- dxq[3 * icount + 1] = dx[1];
- dxq[3 * icount + 2] = dx[2];
- piq[icount] = pi;
- pjq[icount] = pj;
- icount += 1;
+ if (gettimer) TIMER_TOC(timer_dosub_self_grav);
+}
- /* Flush? */
- if (icount == VEC_SIZE) {
- runner_iact_vec_grav(r2q, dxq, piq, pjq);
- icount = 0;
- }
+static void runner_dosub_grav(struct runner *r, struct cell *ci,
+ struct cell *cj, int timer) {
-#endif
+ /* Is this a single cell? */
+ if (cj == NULL) {
- } /* loop over the remaining parts in c. */
+ runner_doself_grav(r, ci, 1);
- } /* loop over the parts in c. */
+ } else {
-#ifdef WITH_VECTORIZATION
- /* Pick up any leftovers. */
- if (icount > 0)
- for (k = 0; k < icount; k++)
- runner_iact_grav(r2q[k], &dxq[3 * k], piq[k], pjq[k]);
+#ifdef SWIFT_DEBUG_CHECKS
+ if (!cell_are_neighbours(ci, cj))
+ error("Non-neighbouring cells in pair task !");
#endif
- TIMER_TOC(timer_doself_grav);
+ runner_dopair_grav(r, ci, cj, 1);
+ }
}
-/**
- * @brief Compute a gravity sub-task.
- *
- * @param r The #runner.
- * @param ci The first #cell.
- * @param cj The second #cell.
- * @param gettimer Flag to record timer or not.
- */
-
-void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj,
- int gettimer) {
+static void runner_do_grav_mm(struct runner *r, struct cell *ci, int timer) {
- int j, k, periodic = r->e->s->periodic;
- struct space *s = r->e->s;
-
- TIMER_TIC
-
- /* Self-interaction? */
- if (cj == NULL) {
+#if ICHECK > 0
+ for (int pid = 0; pid < ci->gcount; pid++) {
- /* If the cell is split, recurse. */
- if (ci->split) {
-
- /* Split this task into tasks on its progeny. */
- for (j = 0; j < 8; j++)
- if (ci->progeny[j] != NULL) {
- runner_dosub_grav(r, ci->progeny[j], NULL, 0);
- for (k = j + 1; k < 8; k++)
- if (ci->progeny[k] != NULL)
- runner_dosub_grav(r, ci->progeny[j], ci->progeny[k], 0);
- }
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gp = &ci->gparts[pid];
- }
+ if (gp->id_or_neg_offset == ICHECK)
+ message("id=%lld loc=[ %f %f %f ] size= %f count= %d",
+ gp->id_or_neg_offset, ci->loc[0], ci->loc[1], ci->loc[2],
+ ci->width[0], ci->gcount);
+ }
+#endif
- /* Otherwise, just make a pp task out of it. */
- else
- runner_doself_grav(r, ci);
+ /* Recover the list of top-level cells */
+ const struct engine *e = r->e;
+ struct cell *cells = e->s->cells;
+ const int nr_cells = e->s->nr_cells;
+ const int ti_current = e->ti_current;
+ const double max_d =
+ const_gravity_a_smooth * const_gravity_r_cut * ci->width[0];
+ const double max_d2 = max_d * max_d;
+ const double pos_i[3] = {ci->loc[0], ci->loc[1], ci->loc[2]};
- }
+ /* Anything to do here? */
+ if (ci->ti_end_min > ti_current) return;
- /* Nope, pair. */
- else {
+ /* Loop over all the cells and go for a p-m interaction if far enough but not
+ * too far */
+ for (int i = 0; i < nr_cells; ++i) {
- /* Get the opening angle theta. */
- float dx[3], theta;
- for (k = 0; k < 3; k++) {
- dx[k] = fabs(ci->loc[k] - cj->loc[k]);
- if (periodic && dx[k] > 0.5 * s->dim[k]) dx[k] = -dx[k] + s->dim[k];
- if (dx[k] > 0.0f) dx[k] -= ci->width[k];
- }
- theta = (dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2]) /
- (ci->width[0] * ci->width[0] + ci->width[1] * ci->width[1] +
- ci->width[2] * ci->width[2]);
-
- /* Split the interaction? */
- if (theta < const_theta_max * const_theta_max) {
-
- /* Are both ci and cj split? */
- if (ci->split && cj->split) {
-
- /* Split this task into tasks on its progeny. */
- for (j = 0; j < 8; j++)
- if (ci->progeny[j] != NULL) {
- for (k = 0; k < 8; k++)
- if (cj->progeny[k] != NULL)
- runner_dosub_grav(r, ci->progeny[j], cj->progeny[k], 0);
- }
+ struct cell *cj = &cells[i];
- }
+ if (ci == cj) continue;
- /* Otherwise, make a pp task out of it. */
- else
- runner_dopair_grav(r, ci, cj);
+ const double dx[3] = {cj->loc[0] - pos_i[0], // x
+ cj->loc[1] - pos_i[1], // y
+ cj->loc[2] - pos_i[2]}; // z
+ const double r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
- }
+ if (r2 > max_d2) continue;
- /* Otherwise, mm interaction is fine. */
- else
- runner_dograv_mm(r, ci, cj);
+ if (!cell_are_neighbours(ci, cj)) runner_dopair_grav_pm(r, ci, cj);
}
-
- if (gettimer) TIMER_TOC(timer_dosub_grav);
}
+
#endif /* SWIFT_RUNNER_DOIACT_GRAV_H */
diff --git a/src/scheduler.c b/src/scheduler.c
index 9b389c8b9ee4d4f844c44707d083542b33137ade..6a0d886bd5458028c5c05812f10c204bc8946a1a 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -173,7 +173,8 @@ void scheduler_splittasks(struct scheduler *s) {
if (ci->split) {
/* Make a sub? */
- if (scheduler_dosub && ci->count < space_subsize / ci->count) {
+ if (scheduler_dosub && (ci->count * ci->count < space_subsize ||
+ ci->gcount * ci->gcount < space_subsize)) {
/* convert to a self-subtask. */
t->type = task_type_sub_self;
@@ -204,11 +205,10 @@ void scheduler_splittasks(struct scheduler *s) {
ci->progeny[j], ci->progeny[k], 0);
}
}
-
}
- /* Pair interaction? */
- else if (t->type == task_type_pair) {
+ /* Hydro Pair interaction? */
+ else if (t->type == task_type_pair && t->subtype != task_subtype_grav) {
/* Get a handle on the cells involved. */
struct cell *ci = t->ci;
@@ -234,7 +234,7 @@ void scheduler_splittasks(struct scheduler *s) {
/* Replace by a single sub-task? */
if (scheduler_dosub &&
- ci->count * sid_scale[sid] < space_subsize / cj->count &&
+ ci->count * cj->count * sid_scale[sid] < space_subsize &&
sid != 0 && sid != 2 && sid != 6 && sid != 8) {
/* Make this task a sub task. */
@@ -518,6 +518,17 @@ void scheduler_splittasks(struct scheduler *s) {
} /* pair interaction? */
+ /* Long-range gravity interaction ? */
+ else if (t->type == task_type_grav_mm) {
+
+ /* Get a handle on the cells involved. */
+ struct cell *ci = t->ci;
+
+ /* Safety thing */
+ if (ci->gcount == 0) t->type = task_type_none;
+
+ } /* gravity interaction? */
+
} /* loop over all tasks. */
}
diff --git a/src/space.c b/src/space.c
index ea652f7f9b918e7f4f1ebe0a81e6604765691eb1..8b7e5f81b539fb02da44dac67a3575356c08c45e 100644
--- a/src/space.c
+++ b/src/space.c
@@ -205,6 +205,12 @@ void space_regrid(struct space *s, double cell_max, int verbose) {
"Must have at least 3 cells in each spatial dimension when periodicity "
"is switched on.");
+ /* Check if we have enough cells for gravity. */
+ if (s->gravity && (cdim[0] < 8 || cdim[1] < 8 || cdim[2] < 8))
+ error(
+ "Must have at least 8 cells in each spatial dimension when gravity "
+ "is switched on.");
+
/* In MPI-Land, changing the top-level cell size requires that the
* global partition is recomputed and the particles redistributed.
* Be prepared to do that. */
@@ -1421,7 +1427,8 @@ struct cell *space_getcell(struct space *s) {
* @param Npart The number of Gas particles in the space.
* @param Ngpart The number of Gravity particles in the space.
* @param periodic flag whether the domain is periodic or not.
- * @param verbose Print messages to stdout or not
+ * @param gravity flag whether we are doing gravity or not.
+ * @param verbose Print messages to stdout or not.
* @param dry_run If 1, just initialise stuff, don't do anything with the parts.
*
* Makes a grid of edge length > r_max and fills the particles
@@ -1432,8 +1439,8 @@ struct cell *space_getcell(struct space *s) {
void space_init(struct space *s, const struct swift_params *params,
double dim[3], struct part *parts, struct gpart *gparts,
- size_t Npart, size_t Ngpart, int periodic, int verbose,
- int dry_run) {
+ size_t Npart, size_t Ngpart, int periodic, int gravity,
+ int verbose, int dry_run) {
/* Clean-up everything */
bzero(s, sizeof(struct space));
@@ -1443,6 +1450,7 @@ void space_init(struct space *s, const struct swift_params *params,
s->dim[1] = dim[1];
s->dim[2] = dim[2];
s->periodic = periodic;
+ s->gravity = gravity;
s->nr_parts = Npart;
s->size_parts = Npart;
s->parts = parts;
diff --git a/src/space.h b/src/space.h
index a859ff5cb0497276ba5ae6dcec6a47d0ca5d7398..180759f2af69dd0d77e6e681229353358d554291 100644
--- a/src/space.h
+++ b/src/space.h
@@ -98,6 +98,9 @@ struct space {
/* Is the space periodic? */
int periodic;
+ /* Are we doing gravity? */
+ int gravity;
+
/* General-purpose lock for this space. */
swift_lock_type lock;
@@ -141,8 +144,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj,
double *shift);
void space_init(struct space *s, const struct swift_params *params,
double dim[3], struct part *parts, struct gpart *gparts,
- size_t Npart, size_t Ngpart, int periodic, int verbose,
- int dry_run);
+ size_t Npart, size_t Ngpart, int periodic, int gravity,
+ int verbose, int dry_run);
void space_map_cells_pre(struct space *s, int full,
void (*fun)(struct cell *c, void *data), void *data);
void space_map_parts(struct space *s,
diff --git a/src/task.c b/src/task.c
index f3c6a8f711df1a0506567216d1d4021c88c10f58..e9404ab00df4f757f49d6d186f28dc40c49cfa01 100644
--- a/src/task.c
+++ b/src/task.c
@@ -47,20 +47,19 @@
/* Task type names. */
const char *taskID_names[task_type_count] = {
- "none", "sort", "self", "pair", "sub_self",
- "sub_pair", "init", "ghost", "drift", "kick",
- "kick_fixdt", "send", "recv", "grav_pp", "grav_mm",
- "grav_up", "grav_down", "grav_external", "part_sort", "gpart_sort",
- "split_cell", "rewait"};
+ "none", "sort", "self", "pair", "sub",
+ "init", "ghost", "drift", "kick", "kick_fixdt",
+ "send", "recv", "grav_gather_m", "grav_fft", "grav_mm",
+ "grav_up", "grav_external", "part_sort", "gpart_sort", "split_cell",
+ "rewait"};
const char *subtaskID_names[task_type_count] = {"none", "density", "force",
- "grav", "t_end"};
+ "grav"};
/**
* @brief Computes the overlap between the parts array of two given cells.
*/
-
-size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) {
+size_t task_cell_overlap_part(const struct cell *ci, const struct cell *cj) {
if (ci == NULL || cj == NULL) return 0;
if (ci->parts <= cj->parts &&
ci->parts + ci->count >= cj->parts + cj->count) {
@@ -72,6 +71,95 @@ size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) {
return 0;
}
+/**
+ * @brief Computes the overlap between the gparts array of two given cells.
+ */
+size_t task_cell_overlap_gpart(const struct cell *ci, const struct cell *cj) {
+ if (ci == NULL || cj == NULL) return 0;
+ if (ci->gparts <= cj->gparts &&
+ ci->gparts + ci->gcount >= cj->gparts + cj->gcount) {
+ return cj->gcount;
+ } else if (cj->gparts <= ci->gparts &&
+ cj->gparts + cj->gcount >= ci->gparts + ci->gcount) {
+ return ci->gcount;
+ }
+ return 0;
+}
+
+/**
+ * @brief Returns the #task_actions for a given task.
+ *
+ * @param t The #task.
+ */
+enum task_actions task_acts_on(const struct task *t) {
+
+ switch (t->type) {
+
+ case task_type_none:
+ return task_action_none;
+ break;
+
+ case task_type_sort:
+ case task_type_ghost:
+ return task_action_part;
+ break;
+
+ case task_type_self:
+ case task_type_pair:
+ case task_type_sub_self:
+ case task_type_sub_pair:
+ switch (t->subtype) {
+
+ case task_subtype_density:
+ case task_subtype_force:
+ return task_action_part;
+ break;
+
+ case task_subtype_grav:
+ return task_action_gpart;
+ break;
+
+ default:
+ error("Unknow task_action for task");
+ return task_action_none;
+ break;
+ }
+ break;
+
+ case task_type_init:
+ case task_type_drift:
+ case task_type_kick:
+ case task_type_kick_fixdt:
+ case task_type_send:
+ case task_type_recv:
+ return task_action_all;
+ break;
+
+ case task_type_grav_gather_m:
+ case task_type_grav_fft:
+ case task_type_grav_mm:
+ case task_type_grav_up:
+ return task_action_multipole;
+ break;
+
+ case task_type_grav_external:
+ return task_action_gpart;
+ break;
+
+ case task_type_part_sort:
+ case task_type_gpart_sort:
+ case task_type_split_cell:
+ case task_type_rewait:
+ return task_action_none;
+ break;
+
+ default:
+ error("Unknow task_action for task");
+ return task_action_none;
+ break;
+ }
+}
+
/**
* @brief Compute the Jaccard similarity of the data used by two
* different tasks.
@@ -79,31 +167,64 @@ size_t task_cell_overlap(const struct cell *ci, const struct cell *cj) {
* @param ta The first #task.
* @param tb The second #task.
*/
-
float task_overlap(const struct task *ta, const struct task *tb) {
+
+ if (ta == NULL || tb == NULL) return 0.f;
+
+ const enum task_actions ta_act = task_acts_on(ta);
+ const enum task_actions tb_act = task_acts_on(tb);
+
/* First check if any of the two tasks are of a type that don't
use cells. */
- if (ta == NULL || tb == NULL || ta->type == task_type_none ||
- ta->type == task_type_part_sort || ta->type == task_type_gpart_sort ||
- ta->type == task_type_split_cell || ta->type == task_type_rewait ||
- tb->type == task_type_none || tb->type == task_type_part_sort ||
- tb->type == task_type_gpart_sort || tb->type == task_type_split_cell ||
- tb->type == task_type_rewait)
- return 0.0f;
-
- /* Compute the union of the cell data. */
- size_t size_union = 0;
- if (ta->ci != NULL) size_union += ta->ci->count;
- if (ta->cj != NULL) size_union += ta->cj->count;
- if (tb->ci != NULL) size_union += tb->ci->count;
- if (tb->cj != NULL) size_union += tb->cj->count;
-
- /* Compute the intersection of the cell data. */
- const size_t size_intersect =
- task_cell_overlap(ta->ci, tb->ci) + task_cell_overlap(ta->ci, tb->cj) +
- task_cell_overlap(ta->cj, tb->ci) + task_cell_overlap(ta->cj, tb->cj);
-
- return ((float)size_intersect) / (size_union - size_intersect);
+ if (ta_act == task_action_none || tb_act == task_action_none) return 0.f;
+
+ const int ta_part = (ta_act == task_action_part || ta_act == task_action_all);
+ const int ta_gpart =
+ (ta_act == task_action_gpart || ta_act == task_action_all);
+ const int tb_part = (tb_act == task_action_part || tb_act == task_action_all);
+ const int tb_gpart =
+ (tb_act == task_action_gpart || tb_act == task_action_all);
+
+ /* In the case where both tasks act on parts */
+ if (ta_part && tb_part) {
+
+ /* Compute the union of the cell data. */
+ size_t size_union = 0;
+ if (ta->ci != NULL) size_union += ta->ci->count;
+ if (ta->cj != NULL) size_union += ta->cj->count;
+ if (tb->ci != NULL) size_union += tb->ci->count;
+ if (tb->cj != NULL) size_union += tb->cj->count;
+
+ /* Compute the intersection of the cell data. */
+ const size_t size_intersect = task_cell_overlap_part(ta->ci, tb->ci) +
+ task_cell_overlap_part(ta->ci, tb->cj) +
+ task_cell_overlap_part(ta->cj, tb->ci) +
+ task_cell_overlap_part(ta->cj, tb->cj);
+
+ return ((float)size_intersect) / (size_union - size_intersect);
+ }
+
+ /* In the case where both tasks act on gparts */
+ else if (ta_gpart && tb_gpart) {
+
+ /* Compute the union of the cell data. */
+ size_t size_union = 0;
+ if (ta->ci != NULL) size_union += ta->ci->gcount;
+ if (ta->cj != NULL) size_union += ta->cj->gcount;
+ if (tb->ci != NULL) size_union += tb->ci->gcount;
+ if (tb->cj != NULL) size_union += tb->cj->gcount;
+
+ /* Compute the intersection of the cell data. */
+ const size_t size_intersect = task_cell_overlap_gpart(ta->ci, tb->ci) +
+ task_cell_overlap_gpart(ta->ci, tb->cj) +
+ task_cell_overlap_gpart(ta->cj, tb->ci) +
+ task_cell_overlap_gpart(ta->cj, tb->cj);
+
+ return ((float)size_intersect) / (size_union - size_intersect);
+ }
+
+ /* Else, no overlap */
+ return 0.f;
}
/**
@@ -111,26 +232,41 @@ float task_overlap(const struct task *ta, const struct task *tb) {
*
* @param t The #task.
*/
-
void task_unlock(struct task *t) {
+ const int type = t->type;
+ const int subtype = t->subtype;
+ struct cell *ci = t->ci, *cj = t->cj;
+
/* Act based on task type. */
- switch (t->type) {
+ switch (type) {
+
+ case task_type_sort:
+ cell_unlocktree(ci);
+ break;
+
case task_type_self:
case task_type_sub_self:
- case task_type_sort:
- cell_unlocktree(t->ci);
+ if (subtype == task_subtype_grav) {
+ cell_gunlocktree(ci);
+ } else {
+ cell_unlocktree(ci);
+ }
break;
+
case task_type_pair:
case task_type_sub_pair:
- cell_unlocktree(t->ci);
- cell_unlocktree(t->cj);
+ if (subtype == task_subtype_grav) {
+ cell_gunlocktree(ci);
+ cell_gunlocktree(cj);
+ } else {
+ cell_unlocktree(ci);
+ cell_unlocktree(cj);
+ }
break;
- case task_type_grav_pp:
+
case task_type_grav_mm:
- case task_type_grav_down:
- cell_gunlocktree(t->ci);
- if (t->cj != NULL) cell_gunlocktree(t->cj);
+ cell_gunlocktree(ci);
break;
default:
break;
@@ -142,7 +278,6 @@ void task_unlock(struct task *t) {
*
* @param t the #task.
*/
-
int task_lock(struct task *t) {
const int type = t->type;
@@ -205,6 +340,10 @@ int task_lock(struct task *t) {
}
break;
+ case task_type_grav_mm:
+ cell_glocktree(ci);
+ break;
+
default:
break;
}
diff --git a/src/task.h b/src/task.h
index 8dea503e42cdba9a188112cfa668c464c69f9959..ee49568b143282b9e3025f6d2bc81ded04ffee41 100644
--- a/src/task.h
+++ b/src/task.h
@@ -46,10 +46,10 @@ enum task_types {
task_type_kick_fixdt,
task_type_send,
task_type_recv,
- task_type_grav_pp,
+ task_type_grav_gather_m,
+ task_type_grav_fft,
task_type_grav_mm,
task_type_grav_up,
- task_type_grav_down,
task_type_grav_external,
task_type_part_sort,
task_type_gpart_sort,
@@ -70,6 +70,16 @@ enum task_subtypes {
task_subtype_count
};
+/* The kind of action the task perform */
+enum task_actions {
+ task_action_none,
+ task_action_part,
+ task_action_gpart,
+ task_action_all,
+ task_action_multipole,
+ task_action_count
+};
+
extern const char *subtaskID_names[];
/* Data of a task. */
@@ -102,5 +112,6 @@ int task_lock(struct task *t);
void task_print_mask(unsigned int mask);
void task_print_submask(unsigned int submask);
void task_do_rewait(struct task *t);
+enum task_actions task_acts_on(const struct task *t);
#endif /* SWIFT_TASK_H */
diff --git a/src/timers.h b/src/timers.h
index c48961b39737f23eb936d7283f76651d33892991..aa8455397daf1e88b709a8332e3ae63694991e94 100644
--- a/src/timers.h
+++ b/src/timers.h
@@ -37,10 +37,11 @@ enum {
timer_dosort,
timer_doself_density,
timer_doself_force,
- timer_doself_grav,
+ timer_doself_grav_pp,
timer_dopair_density,
timer_dopair_force,
- timer_dopair_grav,
+ timer_dopair_grav_pm,
+ timer_dopair_grav_pp,
timer_dograv_external,
timer_dosub_self_density,
timer_dosub_self_force,
diff --git a/src/tools.c b/src/tools.c
index 50ad48af38285062738c7c8ce12e0f4535086dd5..b4a37328ff7df9de76adceebb5ed6801fa417851 100644
--- a/src/tools.c
+++ b/src/tools.c
@@ -426,7 +426,7 @@ void pairs_single_grav(double *dim, long long int pid,
fdx[i] = dx[i];
}
r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2];
- runner_iact_grav(r2, fdx, &pi, &pj);
+ runner_iact_grav_pp(0.f, r2, fdx, &pi, &pj);
a[0] += pi.a_grav[0];
a[1] += pi.a_grav[1];
a[2] += pi.a_grav[2];
@@ -600,3 +600,65 @@ void shuffle_particles(struct part *parts, const int count) {
} else
error("Array not big enough to shuffle!");
}
+
+/**
+ * @brief Computes the forces between all g-particles using the N^2 algorithm
+ *
+ * Overwrites the accelerations of the gparts with the values.
+ * Do not use for actual runs.
+ *
+ * @brief gparts The array of particles.
+ * @brief gcount The number of particles.
+ */
+void gravity_n2(struct gpart *gparts, const int gcount,
+ const struct phys_const *constants, float rlr) {
+
+ const float rlr_inv = 1. / rlr;
+ const float max_d = const_gravity_r_cut * rlr;
+ const float max_d2 = max_d * max_d;
+
+ message("rlr_inv= %f", rlr_inv);
+ message("max_d: %f", max_d);
+
+ /* Reset everything */
+ for (int pid = 0; pid < gcount; pid++) {
+ struct gpart *restrict gpi = &gparts[pid];
+ gpi->a_grav[0] = 0.f;
+ gpi->a_grav[1] = 0.f;
+ gpi->a_grav[2] = 0.f;
+ }
+
+ /* Loop over all particles in ci... */
+ for (int pid = 0; pid < gcount; pid++) {
+
+ /* Get a hold of the ith part in ci. */
+ struct gpart *restrict gpi = &gparts[pid];
+
+ for (int pjd = pid + 1; pjd < gcount; pjd++) {
+
+ /* Get a hold of the jth part in ci. */
+ struct gpart *restrict gpj = &gparts[pjd];
+
+ /* Compute the pairwise distance. */
+ const float dx[3] = {gpi->x[0] - gpj->x[0], // x
+ gpi->x[1] - gpj->x[1], // y
+ gpi->x[2] - gpj->x[2]}; // z
+ const float r2 = dx[0] * dx[0] + dx[1] * dx[1] + dx[2] * dx[2];
+
+ if (r2 < max_d2 || 1) {
+
+ /* Apply the gravitational acceleration. */
+ runner_iact_grav_pp(rlr_inv, r2, dx, gpi, gpj);
+ }
+ }
+ }
+
+ /* Multiply by Newton's constant */
+ const double const_G = constants->const_newton_G;
+ for (int pid = 0; pid < gcount; pid++) {
+ struct gpart *restrict gpi = &gparts[pid];
+ gpi->a_grav[0] *= const_G;
+ gpi->a_grav[1] *= const_G;
+ gpi->a_grav[2] *= const_G;
+ }
+}
diff --git a/src/tools.h b/src/tools.h
index ba5b2f8c7f118edb042c825ff00907ad807b3d55..43ddd946c3e8cdf53139bb917135dffd8a8acd12 100644
--- a/src/tools.h
+++ b/src/tools.h
@@ -22,12 +22,9 @@
#ifndef SWIFT_TOOL_H
#define SWIFT_TOOL_H
-/* Config parameters. */
-#include "../config.h"
-
-/* Includes. */
#include "cell.h"
#include "part.h"
+#include "physical_constants.h"
#include "runner.h"
void factor(int value, int *f1, int *f2);
@@ -47,5 +44,7 @@ void pairs_n2(double *dim, struct part *restrict parts, int N, int periodic);
double random_uniform(double a, double b);
void shuffle_particles(struct part *parts, const int count);
+void gravity_n2(struct gpart *gparts, const int gcount,
+ const struct phys_const *constants, float rlr);
#endif /* SWIFT_TOOL_H */
diff --git a/src/version.c b/src/version.c
index ab22c0d9ab8841dedd09aa83aa8803e468e69ce9..68a051fa08c53c68319c1e785c0c6503afe18f4d 100644
--- a/src/version.c
+++ b/src/version.c
@@ -33,6 +33,10 @@
#include
#endif
+#ifdef HAVE_FFTW
+#include
+#endif
+
/* Some standard headers. */
#include
#include
@@ -238,6 +242,22 @@ const char *metis_version(void) {
return version;
}
+/**
+ * @brief return the FFTW version used when SWIFT was built.
+ *
+ * @result description of the FFTW version.
+ */
+const char *fftw3_version(void) {
+
+ static char version[256] = {0};
+#if defined(HAVE_FFTW)
+ sprintf(version, "%s", "3.x (details not available)");
+#else
+ sprintf(version, "Unknown version");
+#endif
+ return version;
+}
+
/**
* @brief Prints a greeting message to the standard output containing code
* version and revision number
@@ -259,6 +279,9 @@ void greetings(void) {
#ifdef HAVE_HDF5
printf(" HDF5 library version: %s\n", hdf5_version());
#endif
+#ifdef HAVE_FFTW
+ printf(" FFTW library version: %s\n", fftw3_version());
+#endif
#ifdef WITH_MPI
printf(" MPI library: %s\n", mpi_version());
#ifdef HAVE_METIS
diff --git a/src/version.h b/src/version.h
index 4b6b38d220b36e5cb340571cd89e6f52a7b21a8e..0d568f312cf775cf932d580a49da7c19c9e14b21 100644
--- a/src/version.h
+++ b/src/version.h
@@ -27,8 +27,9 @@ const char* git_branch(void);
const char* compiler_name(void);
const char* compiler_version(void);
const char* mpi_version(void);
-const char* hdf5_version(void);
const char* metis_version(void);
+const char* hdf5_version(void);
+const char* fftw3_version(void);
void greetings(void);
#endif /* SWIFT_VERSION_H */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 4d6cb65ca6971d30fd6e39f52ce582c59cecb8b8..2adce2c90985b537a668adfccec0d5241113314e 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -18,17 +18,17 @@
AM_CFLAGS = -I$(top_srcdir)/src $(HDF5_CPPFLAGS) -DTIMER
-AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS)
+AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) $(FFTW_LIBS)
# List of programs and scripts to run in the test suite
TESTS = testGreetings testMaths testReading.sh testSingle testKernel testSymmetry \
testPair.sh testPairPerturbed.sh test27cells.sh test27cellsPerturbed.sh \
- testParser.sh testSPHStep test125cells.sh
+ testParser.sh testSPHStep test125cells.sh testKernelGrav testFFT
# List of test programs to compile
check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \
testSPHStep testPair test27cells test125cells testParser \
- testKernel testInteractions testMaths testSymmetry
+ testKernel testKernelGrav testFFT testInteractions testMaths testSymmetry
# Sources for the individual programs
testGreetings_SOURCES = testGreetings.c
@@ -37,8 +37,6 @@ testMaths_SOURCES = testMaths.c
testReading_SOURCES = testReading.c
-testKernel_SOURCES = testKernel.c
-
testSymmetry_SOURCES = testSymmetry.c
testTimeIntegration_SOURCES = testTimeIntegration.c
@@ -55,6 +53,12 @@ test125cells_SOURCES = test125cells.c
testParser_SOURCES = testParser.c
+testKernel_SOURCES = testKernel.c
+
+testKernelGrav_SOURCES = testKernelGrav.c
+
+testFFT_SOURCES = testFFT.c
+
testInteractions_SOURCES = testInteractions.c
# Files necessary for distribution
diff --git a/tests/testFFT.c b/tests/testFFT.c
new file mode 100644
index 0000000000000000000000000000000000000000..7ff29dac3f0e9b7d6aec65cbfc7e4f4b85d08dbb
--- /dev/null
+++ b/tests/testFFT.c
@@ -0,0 +1,189 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk).
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+
+/* Some standard headers. */
+#include
+#include
+
+#include
+
+/* Includes. */
+#include "swift.h"
+
+const double G = 1.;
+
+const size_t N = 16;
+const size_t PMGRID = 8;
+
+// const double asmth = 2. * M_PI * const_gravity_a_smooth / boxSize;
+// const double asmth2 = asmth * asmth;
+// const double fact = G / (M_PI * boxSize) * (1. / (2. * boxSize / PMGRID));
+
+int main() {
+
+ /* Initialize CPU frequency, this also starts time. */
+ unsigned long long cpufreq = 0;
+ clocks_set_cpufreq(cpufreq);
+
+ /* Simulation properties */
+ const size_t count = N * N * N;
+ const double boxSize = 1.;
+
+ /* Create some particles */
+ struct gpart* gparts = malloc(count * sizeof(struct gpart));
+ bzero(gparts, count * sizeof(struct gpart));
+ for (size_t i = 0; i < N; ++i) {
+ for (size_t j = 0; j < N; ++j) {
+ for (size_t k = 0; k < N; ++k) {
+
+ struct gpart* gp = &gparts[i * N * N + j * N + k];
+
+ gp->x[0] = i * boxSize / N + boxSize / (2 * N);
+ gp->x[1] = j * boxSize / N + boxSize / (2 * N);
+ gp->x[2] = k * boxSize / N + boxSize / (2 * N);
+
+ gp->mass = 1. / count;
+
+ gp->id_or_neg_offset = i * N * N + j * N + k;
+ }
+ }
+ }
+
+ /* Properties of the mesh */
+ const size_t meshmin[3] = {0, 0, 0};
+ const size_t meshmax[3] = {PMGRID - 1, PMGRID - 1, PMGRID - 1};
+
+ const size_t dimx = meshmax[0] - meshmin[0] + 2;
+ const size_t dimy = meshmax[1] - meshmin[1] + 2;
+ const size_t dimz = meshmax[2] - meshmin[2] + 2;
+
+ const double fac = PMGRID / boxSize;
+ const size_t PMGRID2 = 2 * (PMGRID / 2 + 1);
+
+ /* message("dimx=%zd dimy=%zd dimz=%zd", dimx, dimy, dimz); */
+
+ /* Allocate and empty the workspace mesh */
+ const size_t workspace_size = (dimx + 4) * (dimy + 4) * (dimz + 4);
+ double* workspace = fftw_malloc(workspace_size * sizeof(double));
+ bzero(workspace, workspace_size * sizeof(double));
+
+ /* Do CIC with the particles */
+ for (size_t pid = 0; pid < count; ++pid) {
+
+ const struct gpart* const gp = &gparts[pid];
+
+ const size_t slab_x =
+ (fac * gp->x[0] >= PMGRID) ? PMGRID - 1 : fac * gp->x[0];
+ const size_t slab_y =
+ (fac * gp->x[1] >= PMGRID) ? PMGRID - 1 : fac * gp->x[1];
+ const size_t slab_z =
+ (fac * gp->x[2] >= PMGRID) ? PMGRID - 1 : fac * gp->x[2];
+
+ const double dx = fac * gp->x[0] - (double)slab_x;
+ const double dy = fac * gp->x[1] - (double)slab_y;
+ const double dz = fac * gp->x[2] - (double)slab_z;
+
+ const size_t slab_xx = slab_x + 1;
+ const size_t slab_yy = slab_y + 1;
+ const size_t slab_zz = slab_z + 1;
+
+ workspace[(slab_x * dimy + slab_y) * dimz + slab_z] +=
+ gp->mass * (1.0 - dx) * (1.0 - dy) * (1.0 - dz);
+ workspace[(slab_x * dimy + slab_yy) * dimz + slab_z] +=
+ gp->mass * (1.0 - dx) * dy * (1.0 - dz);
+ workspace[(slab_x * dimy + slab_y) * dimz + slab_zz] +=
+ gp->mass * (1.0 - dx) * (1.0 - dy) * dz;
+ workspace[(slab_x * dimy + slab_yy) * dimz + slab_zz] +=
+ gp->mass * (1.0 - dx) * dy * dz;
+ workspace[(slab_xx * dimy + slab_y) * dimz + slab_z] +=
+ gp->mass * (dx) * (1.0 - dy) * (1.0 - dz);
+ workspace[(slab_xx * dimy + slab_yy) * dimz + slab_z] +=
+ gp->mass * (dx)*dy * (1.0 - dz);
+ workspace[(slab_xx * dimy + slab_y) * dimz + slab_zz] +=
+ gp->mass * (dx) * (1.0 - dy) * dz;
+ workspace[(slab_xx * dimy + slab_yy) * dimz + slab_zz] +=
+ gp->mass * (dx)*dy * dz;
+ }
+
+ /* for(size_t i = 0 ; i < dimx*dimy*dimz; ++i) */
+ /* message("workspace[%zd] = %f", i, workspace[i]); */
+
+ /* Prepare the force grid */
+ const size_t fft_size = workspace_size;
+ double* forcegrid = fftw_malloc(fft_size * sizeof(double));
+ bzero(forcegrid, fft_size * sizeof(double));
+
+ const size_t sendmin = 0, recvmin = 0;
+ const size_t sendmax = PMGRID, recvmax = PMGRID;
+
+ memcpy(forcegrid, workspace + (sendmin - meshmin[0]) * dimy * dimz,
+ (sendmax - sendmin + 1) * dimy * dimz * sizeof(double));
+
+ /* for (size_t i = 0; i < fft_size; ++i) */
+ /* if (forcegrid[i] != workspace[i]) error("wrong"); */
+
+ /* Prepare the density grid */
+ double* rhogrid = fftw_malloc(fft_size * sizeof(double));
+ bzero(rhogrid, fft_size * sizeof(double));
+
+ /* Now get the density */
+ for (size_t slab_x = recvmin; slab_x <= recvmax; slab_x++) {
+
+ const size_t slab_xx = slab_x % PMGRID;
+
+ for (size_t slab_y = recvmin; slab_y <= recvmax; slab_y++) {
+
+ const size_t slab_yy = slab_y % PMGRID;
+
+ for (size_t slab_z = recvmin; slab_z <= recvmax; slab_z++) {
+
+ const size_t slab_zz = slab_z % PMGRID;
+
+ rhogrid[PMGRID * PMGRID2 * slab_xx + PMGRID2 * slab_yy + slab_zz] +=
+ forcegrid[((slab_x - recvmin) * dimy + (slab_y - recvmin)) * dimz +
+ (slab_z - recvmin)];
+ }
+ }
+ }
+
+ /* for (size_t i = 0; i < 640; i++) { */
+ /* if (rhogrid[i] != workspace[i]) { */
+ /* message("rhogrid[%zd]= %f workspace[%zd]= %f forcegrid[%zd]= %f", i, */
+ /* rhogrid[i], i, workspace[i], i, forcegrid[i]); */
+ /* } */
+ /* } */
+
+ /* FFT of the density field */
+ fftw_complex* fftgrid = fftw_malloc(fft_size * sizeof(fftw_complex));
+ fftw_plan plan_forward = fftw_plan_dft_r2c_3d(PMGRID, PMGRID, PMGRID, rhogrid,
+ fftgrid, FFTW_ESTIMATE);
+ fftw_execute(plan_forward);
+
+ for (size_t i = 0; i < 640; i++) {
+ message("workspace[%zd]= %f", i, fftgrid[i][0]);
+ }
+
+ /* Clean-up */
+ fftw_destroy_plan(plan_forward);
+ fftw_free(forcegrid);
+ fftw_free(rhogrid);
+ fftw_free(workspace);
+ free(gparts);
+ return 0;
+}
diff --git a/tests/testKernelGrav.c b/tests/testKernelGrav.c
new file mode 100644
index 0000000000000000000000000000000000000000..42ce6066b605c8cc9ac78a6e565dd59a500112f4
--- /dev/null
+++ b/tests/testKernelGrav.c
@@ -0,0 +1,112 @@
+/*******************************************************************************
+ * This file is part of SWIFT.
+ * Copyright (C) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk)
+ * James Willis (james.s.willis@durham.ac.uk)
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program. If not, see .
+ *
+ ******************************************************************************/
+#include "const.h"
+#include "kernel_gravity.h"
+#include "kernel_long_gravity.h"
+
+#include
+#include
+#include
+#include
+
+#define numPoints (1 << 7)
+
+/**
+ * @brief The Gadget-2 gravity kernel function
+ *
+ * @param r The distance between particles
+ * @param h The cut-off distance of the kernel
+ */
+float gadget(float r, float h) {
+ float fac;
+ const float r2 = r * r;
+ if (r >= h)
+ fac = 1.0f / (r2 * r);
+ else {
+ const float h_inv = 1. / h;
+ const float h_inv3 = h_inv * h_inv * h_inv;
+ const float u = r * h_inv;
+ if (u < 0.5)
+ fac = h_inv3 * (10.666666666667 + u * u * (32.0 * u - 38.4));
+ else
+ fac =
+ h_inv3 * (21.333333333333 - 48.0 * u + 38.4 * u * u -
+ 10.666666666667 * u * u * u - 0.066666666667 / (u * u * u));
+ }
+ return fac;
+}
+
+int main() {
+
+ const float h = 3.f;
+ const float r_max = 6.f;
+
+ for (int k = 1; k < numPoints; ++k) {
+
+ const float r = (r_max * k) / numPoints;
+
+ const float u = r / h;
+
+ const float gadget_w = gadget(r, h);
+
+ float swift_w;
+ if (u < 1.) {
+ kernel_grav_eval(u, &swift_w);
+ swift_w *= (1 / (h * h * h));
+ } else {
+ swift_w = 1 / (r * r * r);
+ }
+
+ printf("%2d: r= %f h= %f u= %f Wg(r,h)= %f Ws(r,h)= %f\n", k, r, h, u,
+ gadget_w, swift_w);
+
+ if (fabsf(gadget_w - swift_w) > 2e-7) {
+ printf("Invalid value ! Gadget= %e, SWIFT= %e\n", gadget_w, swift_w);
+ return 1;
+ }
+ }
+
+ printf("\nAll values are consistent\n");
+
+ /* Now test the long range function */
+ const float a_smooth = const_gravity_a_smooth;
+
+ for (int k = 1; k < numPoints; ++k) {
+
+ const float r = (r_max * k) / numPoints;
+
+ const float u = r / a_smooth;
+
+ float swift_w;
+ kernel_long_grav_eval(u, &swift_w);
+
+ float gadget_w = erfc(u / 2) + u * exp(-u * u / 4) / sqrt(M_PI);
+
+ printf("%2d: r= %f r_lr= %f u= %f Ws(r)= %f Wg(r)= %f\n", k, r, a_smooth, u,
+ swift_w, gadget_w);
+
+ if (fabsf(gadget_w - swift_w) > 2e-7) {
+ printf("Invalid value ! Gadget= %e, SWIFT= %e\n", gadget_w, swift_w);
+ return 1;
+ }
+ }
+
+ return 0;
+}