From 0f6b13c71e06d4aa503778ac24e9de98d807d419 Mon Sep 17 00:00:00 2001
From: Matthieu Schaller <schaller@strw.leidenuniv.nl>
Date: Thu, 13 Sep 2018 22:33:43 +0200
Subject: [PATCH] Use the threaded version of FFTW to speed-up the PM part of
 the code.

---
 configure.ac       |  6 +++---
 examples/main.c    |  2 +-
 src/mesh_gravity.c | 18 +++++++++++++++++-
 src/mesh_gravity.h |  5 ++++-
 4 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3d8f313a92..82fd3b156d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -588,7 +588,7 @@ AH_VERBATIM([__STDC_FORMAT_MACROS],
 
 # Check for FFTW. We test for this in the standard directories by default,
 # and only disable if using --with-fftw=no or --without-fftw. When a value
-# is given GSL must be found.
+# is given FFTW must be found.
 have_fftw="no"
 AC_ARG_WITH([fftw],
     [AS_HELP_STRING([--with-fftw=PATH],
@@ -599,10 +599,10 @@ AC_ARG_WITH([fftw],
 )
 if test "x$with_fftw" != "xno"; then
    if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then
-      FFTW_LIBS="-L$with_fftw/lib -lfftw3"
+      FFTW_LIBS="-L$with_fftw/lib -lfftw3_threads -lfftw3"
       FFTW_INCS="-I$with_fftw/include"
    else
-      FFTW_LIBS="-lfftw3"
+      FFTW_LIBS="-lfftw3_threads -lfftw3"
       FFTW_INCS=""
    fi
    #  FFTW is not specified, so just check if we have it.
diff --git a/examples/main.c b/examples/main.c
index 36e1f6d8a2..ffbb569b8d 100644
--- a/examples/main.c
+++ b/examples/main.c
@@ -801,7 +801,7 @@ int main(int argc, char *argv[]) {
     /* Initialise the long-range gravity mesh */
     if (with_self_gravity && periodic) {
 #ifdef HAVE_FFTW
-      pm_mesh_init(&mesh, &gravity_properties, s.dim);
+      pm_mesh_init(&mesh, &gravity_properties, s.dim, nr_threads);
 #else
       /* Need the FFTW library if periodic and self gravity. */
       error(
diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c
index 3b3f495e78..cee9d1828a 100644
--- a/src/mesh_gravity.c
+++ b/src/mesh_gravity.c
@@ -596,9 +596,10 @@ void pm_mesh_interpolate_forces(const struct pm_mesh* mesh,
  * @param mesh The #pm_mesh to initialise.
  * @param props The propoerties of the gravity scheme.
  * @param dim The (comoving) side-lengths of the simulation volume.
+ * @param nr_threads The number of threads on this MPI rank.
  */
 void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
-                  double dim[3]) {
+                  double dim[3], int nr_threads) {
 
 #ifdef HAVE_FFTW
 
@@ -608,6 +609,7 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
   const int N = props->mesh_size;
   const double box_size = dim[0];
 
+  mesh->nr_threads = nr_threads;
   mesh->periodic = 1;
   mesh->N = N;
   mesh->dim[0] = dim[0];
@@ -622,6 +624,12 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
   if (2. * mesh->r_cut_max > box_size)
     error("Mesh too small or r_cut_max too big for this box size");
 
+  /* Initialise the thread-parallel FFTW version */
+  if (N >= 64) {
+    fftw_init_threads();
+    fftw_plan_with_nthreads(nr_threads);
+  }
+
   /* Allocate the memory for the combined density and potential array */
   mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N);
   if (mesh->potential == NULL)
@@ -660,6 +668,8 @@ void pm_mesh_init_no_mesh(struct pm_mesh* mesh, double dim[3]) {
  */
 void pm_mesh_clean(struct pm_mesh* mesh) {
 
+  fftw_cleanup_threads();
+
   if (mesh->potential) free(mesh->potential);
   mesh->potential = 0;
 }
@@ -692,6 +702,12 @@ void pm_mesh_struct_restore(struct pm_mesh* mesh, FILE* stream) {
 #ifdef HAVE_FFTW
     const int N = mesh->N;
 
+   /* Initialise the thread-parallel FFTW version */
+   if (N >= 64) {
+      fftw_init_threads();
+      fftw_plan_with_nthreads(mesh->nr_threads);
+    }
+
     /* Allocate the memory for the combined density and potential array */
     mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N);
     if (mesh->potential == NULL)
diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h
index abff77afbd..1b2d997398 100644
--- a/src/mesh_gravity.h
+++ b/src/mesh_gravity.h
@@ -39,6 +39,9 @@ struct pm_mesh {
   /*! Is the calculation using periodic BCs? */
   int periodic;
 
+  /*! The number of threads used by the FFTW library */
+  int nr_threads;
+
   /*! Side-length of the mesh */
   int N;
 
@@ -65,7 +68,7 @@ struct pm_mesh {
 };
 
 void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props,
-                  double dim[3]);
+                  double dim[3], int nr_threads);
 void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]);
 void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s,
                                struct threadpool *tp, int verbose);
-- 
GitLab