diff --git a/src/runner.c b/src/runner.c index b4dbe7b377255b27520b611e0ca9e24289b38ba7..8d54dc07c89754f14caf39583f1123f620b03547 100644 --- a/src/runner.c +++ b/src/runner.c @@ -53,6 +53,7 @@ #include "hydro_properties.h" #include "kick.h" #include "minmax.h" +#include "runner_doiact_fft.h" #include "runner_doiact_vec.h" #include "scheduler.h" #include "sort_part.h" @@ -1902,14 +1903,11 @@ void *runner_main(void *data) { } break; #endif - case task_type_grav_mm: - // runner_do_grav_mm(r, t->ci, 1); - break; case task_type_grav_down: runner_do_grav_down(r, t->ci, 1); break; case task_type_grav_top_level: - // runner_do_grav_top_level(r); + runner_do_grav_fft(r, 1); break; case task_type_grav_long_range: runner_do_grav_long_range(r, t->ci, 1); diff --git a/src/runner_doiact_fft.c b/src/runner_doiact_fft.c index af31698784667be399460d0258f94620cccf7bd5..1e9a4d3be684a6fdec921a4b1ef345895a41d5a9 100644 --- a/src/runner_doiact_fft.c +++ b/src/runner_doiact_fft.c @@ -37,8 +37,17 @@ #include "space.h" #include "timers.h" -__attribute__((always_inline)) INLINE int id(int i, int j, int k, int N) { - return ((i % N) * N * N + (j % N) * N + (k % N)); +/** + * @brief Returns 1D index of a 3D NxNxN array using row-major style. + * + * @param i Index along x. + * @param j Index along y. + * @param k Index along z. + * @param N Size of the array along one axis. + */ +__attribute__((always_inline)) INLINE static int row_major_id(int i, int j, + int k, int N) { + return (i * N * N + j * N + k); } /** @@ -49,33 +58,33 @@ __attribute__((always_inline)) INLINE int id(int i, int j, int k, int N) { * @param N the size of the mesh along one axis. * @param fac The width of a mesh cell. */ -__attribute__((always_inline)) INLINE void multipole_to_mesh_CIC( +__attribute__((always_inline)) INLINE static void multipole_to_mesh_CIC( const struct gravity_tensors* m, double* rho, int N, double fac) { int i = (int)(fac * m->CoM[0]); - if(i >= N) i = N-1; + if (i >= N) i = N - 1; const double dx = fac * m->CoM[0] - i; const double tx = 1. - dx; int j = (int)(fac * m->CoM[1]); - if(j >= N) j = N-1; + if (j >= N) j = N - 1; const double dy = fac * m->CoM[1] - j; const double ty = 1. - dy; int k = (int)(fac * m->CoM[2]); - if(k >= N) k = N-1; + if (k >= N) k = N - 1; const double dz = fac * m->CoM[2] - k; const double tz = 1. - dz; /* CIC ! */ - rho[id(i + 0, j + 0, k + 0, N)] += m->m_pole.M_000 * tx * ty * tz; - rho[id(i + 0, j + 0, k + 1, N)] += m->m_pole.M_000 * tx * ty * dz; - rho[id(i + 0, j + 1, k + 0, N)] += m->m_pole.M_000 * tx * dy * tz; - rho[id(i + 0, j + 1, k + 1, N)] += m->m_pole.M_000 * tx * dy * dz; - rho[id(i + 1, j + 0, k + 0, N)] += m->m_pole.M_000 * dx * ty * tz; - rho[id(i + 1, j + 0, k + 1, N)] += m->m_pole.M_000 * dx * ty * dz; - rho[id(i + 1, j + 1, k + 0, N)] += m->m_pole.M_000 * dx * dy * tz; - rho[id(i + 1, j + 1, k + 1, N)] += m->m_pole.M_000 * dx * dy * dz; + rho[row_major_id(i + 0, j + 0, k + 0, N)] += m->m_pole.M_000 * tx * ty * tz; + rho[row_major_id(i + 0, j + 0, k + 1, N)] += m->m_pole.M_000 * tx * ty * dz; + rho[row_major_id(i + 0, j + 1, k + 0, N)] += m->m_pole.M_000 * tx * dy * tz; + rho[row_major_id(i + 0, j + 1, k + 1, N)] += m->m_pole.M_000 * tx * dy * dz; + rho[row_major_id(i + 1, j + 0, k + 0, N)] += m->m_pole.M_000 * dx * ty * tz; + rho[row_major_id(i + 1, j + 0, k + 1, N)] += m->m_pole.M_000 * dx * ty * dz; + rho[row_major_id(i + 1, j + 1, k + 0, N)] += m->m_pole.M_000 * dx * dy * tz; + rho[row_major_id(i + 1, j + 1, k + 1, N)] += m->m_pole.M_000 * dx * dy * dz; } /** @@ -87,60 +96,33 @@ __attribute__((always_inline)) INLINE void multipole_to_mesh_CIC( * @param N the size of the mesh along one axis. * @param fac width of a mesh cell. */ -__attribute__((always_inline)) INLINE void mesh_to_multipole_CIC( +__attribute__((always_inline)) INLINE static void mesh_to_multipole_CIC( struct gravity_tensors* m, double* pot, int N, double fac) { - + int i = (int)(fac * m->CoM[0]); - if(i >= N) i = N-1; + if (i >= N) i = N - 1; const double dx = fac * m->CoM[0] - i; const double tx = 1. - dx; int j = (int)(fac * m->CoM[1]); - if(j >= N) j = N-1; + if (j >= N) j = N - 1; const double dy = fac * m->CoM[1] - j; const double ty = 1. - dy; int k = (int)(fac * m->CoM[2]); - if(k >= N) k = N-1; + if (k >= N) k = N - 1; const double dz = fac * m->CoM[2] - k; const double tz = 1. - dz; /* CIC ! */ - m->pot.F_000 += pot[id(i + 0, j + 0, k + 0, N)] * tx * ty * tz; - m->pot.F_000 += pot[id(i + 0, j + 0, k + 1, N)] * tx * ty * dz; - m->pot.F_000 += pot[id(i + 0, j + 1, k + 0, N)] * tx * dy * tz; - m->pot.F_000 += pot[id(i + 0, j + 1, k + 1, N)] * tx * dy * dz; - m->pot.F_000 += pot[id(i + 1, j + 0, k + 0, N)] * dx * ty * tz; - m->pot.F_000 += pot[id(i + 1, j + 0, k + 1, N)] * dx * ty * dz; - m->pot.F_000 += pot[id(i + 1, j + 1, k + 0, N)] * dx * dy * tz; - m->pot.F_000 += pot[id(i + 1, j + 1, k + 1, N)] * dx * dy * dz; -} - -void print_array(double* array, int N) { - - for (int k = N - 1; k >= 0; --k) { - printf("--- z = %d ---------\n", k); - for (int j = N - 1; j >= 0; --j) { - for (int i = 0; i < N; ++i) { - printf("%f ", array[i * N * N + j * N + k]); - } - printf("\n"); - } - } -} - -void print_carray(fftw_complex* array, int N) { - - for (int k = N - 1; k >= 0; --k) { - printf("--- z = %d ---------\n", k); - for (int j = N - 1; j >= 0; --j) { - for (int i = 0; i < N; ++i) { - printf("(%f %f) ", array[i * N * N + j * N + k][0], - array[i * N * N + j * N + k][1]); - } - printf("\n"); - } - } + m->pot.F_000 += pot[row_major_id(i + 0, j + 0, k + 0, N)] * tx * ty * tz; + m->pot.F_000 += pot[row_major_id(i + 0, j + 0, k + 1, N)] * tx * ty * dz; + m->pot.F_000 += pot[row_major_id(i + 0, j + 1, k + 0, N)] * tx * dy * tz; + m->pot.F_000 += pot[row_major_id(i + 0, j + 1, k + 1, N)] * tx * dy * dz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 0, k + 0, N)] * dx * ty * tz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 0, k + 1, N)] * dx * ty * dz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 1, k + 0, N)] * dx * dy * tz; + m->pot.F_000 += pot[row_major_id(i + 1, j + 1, k + 1, N)] * dx * dy * dz; } /** @@ -152,7 +134,7 @@ void print_carray(fftw_complex* array, int N) { void runner_do_grav_fft(struct runner* r, int timer) { #ifdef HAVE_FFTW - + const struct engine* e = r->e; const struct space* s = e->s; const integertime_t ti_current = e->ti_current; @@ -168,10 +150,11 @@ void runner_do_grav_fft(struct runner* r, int timer) { const int N = cdim[0]; const int N_half = N / 2; const int Ncells = N * N * N; + const double cell_fac = N / box_size; /* Recover the list of top-level multipoles */ const int nr_cells = s->nr_cells; - struct gravity_tensors* multipoles = s->multipoles_top; + struct gravity_tensors* restrict multipoles = s->multipoles_top; struct cell* cells = s->cells_top; /* Make sure everything has been drifted to the current point */ @@ -181,31 +164,31 @@ void runner_do_grav_fft(struct runner* r, int timer) { } /* Allocates some memory for the density mesh */ - double* rho = fftw_alloc_real(Ncells); + double* restrict rho = fftw_alloc_real(Ncells); if (rho == NULL) error("Error allocating memory for density mesh"); /* Allocates some memory for the mesh in Fourier space */ - fftw_complex* frho = fftw_alloc_complex(N * N *(N_half+1)); + fftw_complex* restrict frho = fftw_alloc_complex(N * N * (N_half + 1)); if (frho == NULL) error("Error allocating memory for transform of density mesh"); /* Prepare the FFT library */ - fftw_plan forward_plan = - fftw_plan_dft_r2c_3d(N, N, N, rho, frho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); - fftw_plan inverse_plan = - fftw_plan_dft_c2r_3d(N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); + fftw_plan forward_plan = fftw_plan_dft_r2c_3d( + N, N, N, rho, frho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); + fftw_plan inverse_plan = fftw_plan_dft_c2r_3d( + N, N, N, frho, rho, FFTW_ESTIMATE | FFTW_DESTROY_INPUT); /* Do a CIC mesh assignment of the multipoles */ bzero(rho, Ncells * sizeof(double)); for (int i = 0; i < nr_cells; ++i) - multipole_to_mesh_CIC(&multipoles[i], rho, N, N / box_size); + multipole_to_mesh_CIC(&multipoles[i], rho, N, cell_fac); /* Fourier transform to go to magic-land */ fftw_execute(forward_plan); /* frho now contains the Fourier transform of the density field */ /* frho contains NxNx(N/2+1) complex numbers */ - + /* Some common factors */ const double green_fac = -1. / (M_PI * box_size); const double a_smooth2 = 4. * M_PI * a_smooth * a_smooth / ((double)(N * N)); @@ -233,7 +216,7 @@ void runner_do_grav_fft(struct runner* r, int timer) { /* kz component of vector in Fourier space and 1/sinc(kz) */ const int kz = (k > N_half ? k - N : k); const double kz_d = (double)kz; - const double fz = k_fac * kz_d; + const double fz = k_fac * kz_d; const double sinc_kz_inv = (kz != 0) ? fz / sin(fz) : 1.; /* Norm of vector in Fourier space */ @@ -273,7 +256,7 @@ void runner_do_grav_fft(struct runner* r, int timer) { /* Get the potential from the mesh using CIC */ for (int i = 0; i < nr_cells; ++i) - mesh_to_multipole_CIC(&multipoles[i], rho, N, N/box_size); + mesh_to_multipole_CIC(&multipoles[i], rho, N, cell_fac); /* Clean-up the mess */ fftw_destroy_plan(forward_plan); @@ -288,3 +271,30 @@ void runner_do_grav_fft(struct runner* r, int timer) { error("No FFTW library found. Cannot compute periodic long-range forces."); #endif } + +void print_array(double* array, int N) { + + for (int k = N - 1; k >= 0; --k) { + printf("--- z = %d ---------\n", k); + for (int j = N - 1; j >= 0; --j) { + for (int i = 0; i < N; ++i) { + printf("%f ", array[i * N * N + j * N + k]); + } + printf("\n"); + } + } +} + +void print_carray(fftw_complex* array, int N) { + + for (int k = N - 1; k >= 0; --k) { + printf("--- z = %d ---------\n", k); + for (int j = N - 1; j >= 0; --j) { + for (int i = 0; i < N; ++i) { + printf("(%f %f) ", array[i * N * N + j * N + k][0], + array[i * N * N + j * N + k][1]); + } + printf("\n"); + } + } +}