diff --git a/configure.ac b/configure.ac index 3d8f313a92e4fc97d2c372fccd9468d10a08fdb4..f5228ebc023d2168176f9ef5d029aeb1d53162e2 100644 --- a/configure.ac +++ b/configure.ac @@ -588,7 +588,8 @@ AH_VERBATIM([__STDC_FORMAT_MACROS], # Check for FFTW. We test for this in the standard directories by default, # and only disable if using --with-fftw=no or --without-fftw. When a value -# is given GSL must be found. +# is given FFTW must be found. +# If FFTW is found, we check whether this is the threaded version. have_fftw="no" AC_ARG_WITH([fftw], [AS_HELP_STRING([--with-fftw=PATH], @@ -598,6 +599,8 @@ AC_ARG_WITH([fftw], [with_fftw="test"] ) if test "x$with_fftw" != "xno"; then + + # Was FFTW's location specifically given? if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then FFTW_LIBS="-L$with_fftw/lib -lfftw3" FFTW_INCS="-I$with_fftw/include" @@ -605,22 +608,51 @@ if test "x$with_fftw" != "xno"; then FFTW_LIBS="-lfftw3" FFTW_INCS="" fi + # FFTW is not specified, so just check if we have it. if test "x$with_fftw" = "xtest"; then AC_CHECK_LIB([fftw3],[fftw_malloc],[have_fftw="yes"],[have_fftw="no"],$FFTW_LIBS) if test "x$have_fftw" != "xno"; then AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]) fi + # FFTW was specified, check that it was a valid location. else AC_CHECK_LIB([fftw3],[fftw_malloc], AC_DEFINE([HAVE_FFTW],1,[The FFTW library appears to be present.]), AC_MSG_ERROR(something is wrong with the FFTW library!), $FFTW_LIBS) have_fftw="yes" fi + + # FFTW was requested not to be used. if test "$have_fftw" = "no"; then FFTW_LIBS="" FFTW_INCS="" fi + + # Now, check whether we have the threaded version of FFTW + if test "x$have_fftw" = "xyes"; then + + # Was FFTW's location specifically given? + if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then + FFTW_THREADED_LIBS="-L$with_fftw/lib -lfftw3_threads -lfftw3" + FFTW_THREADED_INCS="-I$with_fftw/include" + else + FFTW_THREADED_LIBS="-lfftw3_threads -lfftw3" + FFTW_THREADED_INCS="" + fi + + # Verify that the library is threaded + AC_CHECK_LIB([fftw3],[fftw_init_threads],[have_threaded_fftw="yes"], + [have_threaded_fftw="no"], $FFTW_THREADED_LIBS) + + # If found, update things + if test "x$have_threaded_fftw" = "xyes"; then + AC_DEFINE([HAVE_THREADED_FFTW],1,[The threaded FFTW library appears to be present.]) + FFTW_LIBS=$FFTW_THREADED_LIBS + FFTW_INCS=$FFTW_THREADED_INCS + have_fftw="yes - threaded" + fi + fi fi AC_SUBST([FFTW_LIBS]) AC_SUBST([FFTW_INCS]) diff --git a/examples/main.c b/examples/main.c index 36e1f6d8a277a153eb7bd21c1a79498267f484e2..ffbb569b8dc49dec936de4c2d92f558693d9e886 100644 --- a/examples/main.c +++ b/examples/main.c @@ -801,7 +801,7 @@ int main(int argc, char *argv[]) { /* Initialise the long-range gravity mesh */ if (with_self_gravity && periodic) { #ifdef HAVE_FFTW - pm_mesh_init(&mesh, &gravity_properties, s.dim); + pm_mesh_init(&mesh, &gravity_properties, s.dim, nr_threads); #else /* Need the FFTW library if periodic and self gravity. */ error( diff --git a/src/mesh_gravity.c b/src/mesh_gravity.c index 3b3f495e780a2e9595ea17a4cd84d6ababfef03f..2f2f1628e7077ba301322c7af6b858e979b313ad 100644 --- a/src/mesh_gravity.c +++ b/src/mesh_gravity.c @@ -596,9 +596,10 @@ void pm_mesh_interpolate_forces(const struct pm_mesh* mesh, * @param mesh The #pm_mesh to initialise. * @param props The propoerties of the gravity scheme. * @param dim The (comoving) side-lengths of the simulation volume. + * @param nr_threads The number of threads on this MPI rank. */ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, - double dim[3]) { + double dim[3], int nr_threads) { #ifdef HAVE_FFTW @@ -608,6 +609,7 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, const int N = props->mesh_size; const double box_size = dim[0]; + mesh->nr_threads = nr_threads; mesh->periodic = 1; mesh->N = N; mesh->dim[0] = dim[0]; @@ -622,6 +624,14 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, if (2. * mesh->r_cut_max > box_size) error("Mesh too small or r_cut_max too big for this box size"); +#ifdef HAVE_THREADED_FFTW + /* Initialise the thread-parallel FFTW version */ + if (N >= 64) { + fftw_init_threads(); + fftw_plan_with_nthreads(nr_threads); + } +#endif + /* Allocate the memory for the combined density and potential array */ mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); if (mesh->potential == NULL) @@ -660,6 +670,10 @@ void pm_mesh_init_no_mesh(struct pm_mesh* mesh, double dim[3]) { */ void pm_mesh_clean(struct pm_mesh* mesh) { +#ifdef HAVE_THREADED_FFTW + fftw_cleanup_threads(); +#endif + if (mesh->potential) free(mesh->potential); mesh->potential = 0; } @@ -692,6 +706,14 @@ void pm_mesh_struct_restore(struct pm_mesh* mesh, FILE* stream) { #ifdef HAVE_FFTW const int N = mesh->N; +#ifdef HAVE_THREADED_FFTW + /* Initialise the thread-parallel FFTW version */ + if (N >= 64) { + fftw_init_threads(); + fftw_plan_with_nthreads(mesh->nr_threads); + } +#endif + /* Allocate the memory for the combined density and potential array */ mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); if (mesh->potential == NULL) diff --git a/src/mesh_gravity.h b/src/mesh_gravity.h index abff77afbd9c29547a781cdfbf6fed745e1ca37f..1b2d997398ee6f3f665340cedb790c241e641cfa 100644 --- a/src/mesh_gravity.h +++ b/src/mesh_gravity.h @@ -39,6 +39,9 @@ struct pm_mesh { /*! Is the calculation using periodic BCs? */ int periodic; + /*! The number of threads used by the FFTW library */ + int nr_threads; + /*! Side-length of the mesh */ int N; @@ -65,7 +68,7 @@ struct pm_mesh { }; void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props, - double dim[3]); + double dim[3], int nr_threads); void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]); void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s, struct threadpool *tp, int verbose);