Commit 0f6b13c7 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Use the threaded version of FFTW to speed-up the PM part of the code.

parent daa00858
...@@ -588,7 +588,7 @@ AH_VERBATIM([__STDC_FORMAT_MACROS], ...@@ -588,7 +588,7 @@ AH_VERBATIM([__STDC_FORMAT_MACROS],
# Check for FFTW. We test for this in the standard directories by default, # Check for FFTW. We test for this in the standard directories by default,
# and only disable if using --with-fftw=no or --without-fftw. When a value # and only disable if using --with-fftw=no or --without-fftw. When a value
# is given GSL must be found. # is given FFTW must be found.
have_fftw="no" have_fftw="no"
AC_ARG_WITH([fftw], AC_ARG_WITH([fftw],
[AS_HELP_STRING([--with-fftw=PATH], [AS_HELP_STRING([--with-fftw=PATH],
...@@ -599,10 +599,10 @@ AC_ARG_WITH([fftw], ...@@ -599,10 +599,10 @@ AC_ARG_WITH([fftw],
) )
if test "x$with_fftw" != "xno"; then if test "x$with_fftw" != "xno"; then
if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then if test "x$with_fftw" != "xyes" -a "x$with_fftw" != "xtest" -a "x$with_fftw" != "x"; then
FFTW_LIBS="-L$with_fftw/lib -lfftw3" FFTW_LIBS="-L$with_fftw/lib -lfftw3_threads -lfftw3"
FFTW_INCS="-I$with_fftw/include" FFTW_INCS="-I$with_fftw/include"
else else
FFTW_LIBS="-lfftw3" FFTW_LIBS="-lfftw3_threads -lfftw3"
FFTW_INCS="" FFTW_INCS=""
fi fi
# FFTW is not specified, so just check if we have it. # FFTW is not specified, so just check if we have it.
......
...@@ -801,7 +801,7 @@ int main(int argc, char *argv[]) { ...@@ -801,7 +801,7 @@ int main(int argc, char *argv[]) {
/* Initialise the long-range gravity mesh */ /* Initialise the long-range gravity mesh */
if (with_self_gravity && periodic) { if (with_self_gravity && periodic) {
#ifdef HAVE_FFTW #ifdef HAVE_FFTW
pm_mesh_init(&mesh, &gravity_properties, s.dim); pm_mesh_init(&mesh, &gravity_properties, s.dim, nr_threads);
#else #else
/* Need the FFTW library if periodic and self gravity. */ /* Need the FFTW library if periodic and self gravity. */
error( error(
......
...@@ -596,9 +596,10 @@ void pm_mesh_interpolate_forces(const struct pm_mesh* mesh, ...@@ -596,9 +596,10 @@ void pm_mesh_interpolate_forces(const struct pm_mesh* mesh,
* @param mesh The #pm_mesh to initialise. * @param mesh The #pm_mesh to initialise.
* @param props The propoerties of the gravity scheme. * @param props The propoerties of the gravity scheme.
* @param dim The (comoving) side-lengths of the simulation volume. * @param dim The (comoving) side-lengths of the simulation volume.
* @param nr_threads The number of threads on this MPI rank.
*/ */
void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
double dim[3]) { double dim[3], int nr_threads) {
#ifdef HAVE_FFTW #ifdef HAVE_FFTW
...@@ -608,6 +609,7 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, ...@@ -608,6 +609,7 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
const int N = props->mesh_size; const int N = props->mesh_size;
const double box_size = dim[0]; const double box_size = dim[0];
mesh->nr_threads = nr_threads;
mesh->periodic = 1; mesh->periodic = 1;
mesh->N = N; mesh->N = N;
mesh->dim[0] = dim[0]; mesh->dim[0] = dim[0];
...@@ -622,6 +624,12 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props, ...@@ -622,6 +624,12 @@ void pm_mesh_init(struct pm_mesh* mesh, const struct gravity_props* props,
if (2. * mesh->r_cut_max > box_size) if (2. * mesh->r_cut_max > box_size)
error("Mesh too small or r_cut_max too big for this box size"); error("Mesh too small or r_cut_max too big for this box size");
/* Initialise the thread-parallel FFTW version */
if (N >= 64) {
fftw_init_threads();
fftw_plan_with_nthreads(nr_threads);
}
/* Allocate the memory for the combined density and potential array */ /* Allocate the memory for the combined density and potential array */
mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N);
if (mesh->potential == NULL) if (mesh->potential == NULL)
...@@ -660,6 +668,8 @@ void pm_mesh_init_no_mesh(struct pm_mesh* mesh, double dim[3]) { ...@@ -660,6 +668,8 @@ void pm_mesh_init_no_mesh(struct pm_mesh* mesh, double dim[3]) {
*/ */
void pm_mesh_clean(struct pm_mesh* mesh) { void pm_mesh_clean(struct pm_mesh* mesh) {
fftw_cleanup_threads();
if (mesh->potential) free(mesh->potential); if (mesh->potential) free(mesh->potential);
mesh->potential = 0; mesh->potential = 0;
} }
...@@ -692,6 +702,12 @@ void pm_mesh_struct_restore(struct pm_mesh* mesh, FILE* stream) { ...@@ -692,6 +702,12 @@ void pm_mesh_struct_restore(struct pm_mesh* mesh, FILE* stream) {
#ifdef HAVE_FFTW #ifdef HAVE_FFTW
const int N = mesh->N; const int N = mesh->N;
/* Initialise the thread-parallel FFTW version */
if (N >= 64) {
fftw_init_threads();
fftw_plan_with_nthreads(mesh->nr_threads);
}
/* Allocate the memory for the combined density and potential array */ /* Allocate the memory for the combined density and potential array */
mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N); mesh->potential = (double*)fftw_malloc(sizeof(double) * N * N * N);
if (mesh->potential == NULL) if (mesh->potential == NULL)
......
...@@ -39,6 +39,9 @@ struct pm_mesh { ...@@ -39,6 +39,9 @@ struct pm_mesh {
/*! Is the calculation using periodic BCs? */ /*! Is the calculation using periodic BCs? */
int periodic; int periodic;
/*! The number of threads used by the FFTW library */
int nr_threads;
/*! Side-length of the mesh */ /*! Side-length of the mesh */
int N; int N;
...@@ -65,7 +68,7 @@ struct pm_mesh { ...@@ -65,7 +68,7 @@ struct pm_mesh {
}; };
void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props, void pm_mesh_init(struct pm_mesh *mesh, const struct gravity_props *props,
double dim[3]); double dim[3], int nr_threads);
void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]); void pm_mesh_init_no_mesh(struct pm_mesh *mesh, double dim[3]);
void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s, void pm_mesh_compute_potential(struct pm_mesh *mesh, const struct space *s,
struct threadpool *tp, int verbose); struct threadpool *tp, int verbose);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment