diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index 0d9d2b8c4af40556634e0cf67e9cab45e511636d..811d6fc8f902530840bcce4cf378c72ce25d0f4f 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -23,6 +23,7 @@ /* Includes. */ #include "kernel_gravity.h" #include "kernel_long_gravity.h" +#include "multipole.h" /** * @brief Computes the intensity of the force at a point generated by a @@ -35,7 +36,7 @@ * @param h2 Square of the softening length. * @param h_inv Inverse of the softening length. * @param h_inv3 Cube of the inverse of the softening length. - * @param mass of the point-mass. + * @param mass Mass of the point-mass. * @param f_ij (return) The force intensity. */ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full( @@ -74,7 +75,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_full( * @param h2 Square of the softening length. * @param h_inv Inverse of the softening length. * @param h_inv3 Cube of the inverse of the softening length. - * @param mass of the point-mass. + * @param mass Mass of the point-mass. * @param rlr_inv Inverse of the mesh smoothing scale. * @param f_ij (return) The force intensity. */ @@ -111,4 +112,51 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav_pp_truncated( *f_ij *= corr_lr; } +/** + * @brief Computes the force at a point generated by a multipole. + * + * This uses the quadrupole terms only and defaults to the monopole if + * the code is compiled with low-order gravity only. + * + * @param r_x x-component of the distance vector to the multipole. + * @param r_y y-component of the distance vector to the multipole. + * @param r_z z-component of the distance vector to the multipole. + * @param r2 Square of the distance vector to the multipole. + * @param h The softening length. + * @param h_inv Inverse of the softening length. + * @param m The multipole. + * @param f_x (return) The x-component of the acceleration. + * @param f_y (return) The y-component of the acceleration. + * @param f_z (return) The z-component of the acceleration. + */ +__attribute__((always_inline)) INLINE static void runner_iact_grav_pm( + float r_x, float r_y, float r_z, float r2, float h, float h_inv, + const struct multipole *m, float *f_x, float *f_y, float *f_z) { + +#if SELF_GRAVITY_MULTIPOLE_ORDER < 3 + runner_iact_grav_pp_full(r2, h * h, h_inv, h_inv3, m->M_000, f_ij); +#else + + /* Get the inverse distance */ + const float r_inv = 1.f / sqrtf(r2); + + struct potential_derivatives_M2P pot; + compute_potential_derivatives_M2P(r_x, r_y, r_z, r2, r_inv, h, h_inv, &pot); + + /* 1st order terms (monopole) */ + *f_x = m->M_000 * pot.D_100; + *f_y = m->M_000 * pot.D_010; + *f_z = m->M_000 * pot.D_001; + + /* 3rd order terms (quadrupole) */ + *f_x += m->M_200 * pot.D_300 + m->M_020 * pot.D_120 + m->M_002 * pot.D_102; + *f_y += m->M_200 * pot.D_210 + m->M_020 * pot.D_030 + m->M_002 * pot.D_012; + *f_z += m->M_200 * pot.D_201 + m->M_020 * pot.D_021 + m->M_002 * pot.D_003; + *f_x += m->M_110 * pot.D_210 + m->M_101 * pot.D_201 + m->M_011 * pot.D_111; + *f_y += m->M_110 * pot.D_120 + m->M_101 * pot.D_111 + m->M_011 * pot.D_021; + *f_z += m->M_110 * pot.D_111 + m->M_101 * pot.D_102 + m->M_011 * pot.D_012; + +#endif +} + #endif /* SWIFT_DEFAULT_GRAVITY_IACT_H */ diff --git a/src/gravity_cache.h b/src/gravity_cache.h index 14b672233aa9958ec39af32a87baead98c0bae04..194ef837396b3e72441c700bd92b27712178ef6e 100644 --- a/src/gravity_cache.h +++ b/src/gravity_cache.h @@ -134,16 +134,11 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate( int gcount_padded, const double shift[3]) { /* Make the compiler understand we are in happy vectorization land */ - float *restrict x = c->x; - float *restrict y = c->y; - float *restrict z = c->z; - float *restrict m = c->m; - float *restrict epsilon = c->epsilon; - swift_align_information(x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT); - swift_align_information(m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ @@ -183,16 +178,11 @@ __attribute__((always_inline)) INLINE void gravity_cache_populate_no_shift( int gcount_padded) { /* Make the compiler understand we are in happy vectorization land */ - float *restrict x = c->x; - float *restrict y = c->y; - float *restrict z = c->z; - float *restrict m = c->m; - float *restrict epsilon = c->epsilon; - swift_align_information(x, SWIFT_CACHE_ALIGNMENT); - swift_align_information(y, SWIFT_CACHE_ALIGNMENT); - swift_align_information(z, SWIFT_CACHE_ALIGNMENT); - swift_align_information(epsilon, SWIFT_CACHE_ALIGNMENT); - swift_align_information(m, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, x, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, y, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, z, c->x, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, epsilon, c->epsilon, SWIFT_CACHE_ALIGNMENT); + swift_declare_aligned_ptr(float, m, c->m, SWIFT_CACHE_ALIGNMENT); swift_assume_size(gcount_padded, VEC_SIZE); /* Fill the input caches */ diff --git a/src/gravity_derivatives.h b/src/gravity_derivatives.h index 27aa4f12ae41c74ab5a9319aeedea850a0716766..cf8aa54338b2e87e8bf5f2cc453ad7417eea5804 100644 --- a/src/gravity_derivatives.h +++ b/src/gravity_derivatives.h @@ -34,7 +34,11 @@ #include "inline.h" #include "kernel_gravity.h" -struct potential_derivatives { +/** + * @brief Structure containing all the derivatives of the potential field + * required for the M2L kernel + */ +struct potential_derivatives_M2L { /* 0th order terms */ float D_000; @@ -85,9 +89,26 @@ struct potential_derivatives { #endif }; +/** + * @brief Structure containing all the derivatives of the potential field + * required for the M2P kernel + */ +struct potential_derivatives_M2P { + + /* 1st order terms */ + float D_100, D_010, D_001; + + /* 3rd order terms */ + float D_300, D_030, D_003; + float D_210, D_201; + float D_120, D_021; + float D_102, D_012; + float D_111; +}; + /** * @brief Compute all the relevent derivatives of the softened and truncated - * gravitational potential. + * gravitational potential for the M2L kernel. * * @param r_x x-component of distance vector * @param r_y y-component of distance vector @@ -95,13 +116,13 @@ struct potential_derivatives { * @param r2 Square norm of distance vector * @param r_inv Inverse norm of distance vector * @param eps Softening length. - * @param eps2 Square of softening length. * @param eps_inv Inverse of softening length. * @param pot (return) The structure containing all the derivatives. */ -__attribute__((always_inline)) INLINE static void compute_potential_derivatives( - float r_x, float r_y, float r_z, float r2, float r_inv, float eps, - float eps2, float eps_inv, struct potential_derivatives *pot) { +__attribute__((always_inline)) INLINE static void +compute_potential_derivatives_M2L(float r_x, float r_y, float r_z, float r2, + float r_inv, float eps, float eps_inv, + struct potential_derivatives_M2L *pot) { float Dt_1; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 @@ -121,7 +142,7 @@ __attribute__((always_inline)) INLINE static void compute_potential_derivatives( #endif /* Un-softened case */ - if (r2 > eps2) { + if (r2 > eps * eps) { Dt_1 = r_inv; #if SELF_GRAVITY_MULTIPOLE_ORDER > 0 @@ -290,4 +311,79 @@ __attribute__((always_inline)) INLINE static void compute_potential_derivatives( #endif } +/** + * @brief Compute all the relevent derivatives of the softened and truncated + * gravitational potential for the M2P kernel. + * + * @param r_x x-component of distance vector + * @param r_y y-component of distance vector + * @param r_z z-component of distance vector + * @param r2 Square norm of distance vector + * @param r_inv Inverse norm of distance vector + * @param eps Softening length. + * @param eps_inv Inverse of softening length. + * @param pot (return) The structure containing all the derivatives. + */ +__attribute__((always_inline)) INLINE static void +compute_potential_derivatives_M2P(float r_x, float r_y, float r_z, float r2, + float r_inv, float eps, float eps_inv, + struct potential_derivatives_M2P *pot) { + + float Dt_1; + float Dt_3; + float Dt_5; + float Dt_7; + + /* Un-softened case */ + if (r2 > eps * eps) { + + const float r_inv2 = r_inv * r_inv; + + Dt_1 = r_inv; + Dt_3 = -1.f * Dt_1 * r_inv2; /* -1 / r^3 */ + Dt_5 = -3.f * Dt_3 * r_inv2; /* 3 / r^5 */ + Dt_7 = -5.f * Dt_5 * r_inv2; /* -15 / r^7 */ + + } else { + + const float r = r2 * r_inv; + const float u = r * eps_inv; + const float u_inv = r_inv * eps; + const float eps_inv2 = eps_inv * eps_inv; + const float eps_inv3 = eps_inv * eps_inv2; + const float eps_inv5 = eps_inv3 * eps_inv2; + const float eps_inv7 = eps_inv5 * eps_inv2; + + Dt_1 = eps_inv * D_soft_1(u, u_inv); + Dt_3 = -eps_inv3 * D_soft_3(u, u_inv); + Dt_5 = eps_inv5 * D_soft_5(u, u_inv); + Dt_7 = -eps_inv7 * D_soft_7(u, u_inv); + } + + /* Compute some powers of r_x, r_y and r_z */ + const float r_x2 = r_x * r_x; + const float r_y2 = r_y * r_y; + const float r_z2 = r_z * r_z; + const float r_x3 = r_x2 * r_x; + const float r_y3 = r_y2 * r_y; + const float r_z3 = r_z2 * r_z; + + /* 1st order derivatives */ + pot->D_100 = r_x * Dt_3; + pot->D_010 = r_y * Dt_3; + pot->D_001 = r_z * Dt_3; + + /* 3rd order derivatives */ + pot->D_300 = r_x3 * Dt_7 + 3.f * r_x * Dt_5; + pot->D_030 = r_y3 * Dt_7 + 3.f * r_y * Dt_5; + pot->D_003 = r_z3 * Dt_7 + 3.f * r_z * Dt_5; + pot->D_210 = r_x2 * r_y * Dt_7 + r_y * Dt_5; + pot->D_201 = r_x2 * r_z * Dt_7 + r_z * Dt_5; + pot->D_120 = r_y2 * r_x * Dt_7 + r_x * Dt_5; + pot->D_021 = r_y2 * r_z * Dt_7 + r_z * Dt_5; + pot->D_102 = r_z2 * r_x * Dt_7 + r_x * Dt_5; + pot->D_012 = r_z2 * r_y * Dt_7 + r_y * Dt_5; + pot->D_111 = r_x * r_y * r_z * Dt_7; +} + #endif /* SWIFT_GRAVITY_DERIVATIVE_H */ diff --git a/src/multipole.h b/src/multipole.h index d0dfb4ecd76e0017df01d7b5f4bf860d918f0639..2e3bab65c314cdcf94e6eabdac097ca113150e3a 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -1517,7 +1517,6 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, /* Recover some constants */ const float eps = props->epsilon; - const float eps2 = props->epsilon2; const float eps_inv = props->epsilon_inv; /* Compute distance vector */ @@ -1537,9 +1536,8 @@ INLINE static void gravity_M2L(struct grav_tensor *l_b, const float r_inv = 1. / sqrtf(r2); /* Compute all derivatives */ - struct potential_derivatives pot; - compute_potential_derivatives(dx, dy, dz, r2, r_inv, eps, eps2, eps_inv, - &pot); + struct potential_derivatives_M2L pot; + compute_potential_derivatives_M2L(dx, dy, dz, r2, r_inv, eps, eps_inv, &pot); #ifdef SWIFT_DEBUG_CHECKS /* Count interactions */ diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 906e9ad35572b643a5c2126f3dc25f67532e30a4..0dd6b2cdfd430e471fc13087afb0b2fcde947b97 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1039,7 +1039,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, const float hj = pj->h; const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; - + double pjx[3]; for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; const float hjg2 = hj * hj * kernel_gamma2; @@ -1451,7 +1451,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { const float hj = pj->h; const double dj = sort_j[pjd].d - hj * kernel_gamma - dx_max + rshift; if (dj - rshift > di_max) continue; - + double pjx[3]; for (int k = 0; k < 3; k++) pjx[k] = pj->x[k] + shift[k]; const float hjg2 = hj * hj * kernel_gamma2; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index ab71ba20ee08c330ec5799717dabc8a6147b2553..b14ab074ee8c70c9ac2dddcec5e825a6bf5c22ce 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -209,8 +209,8 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci, /* Recover the multipole info and shift the CoM locations */ const float rmax_i = ci->multipole->r_max; const float rmax_j = cj->multipole->r_max; - const float multi_mass_i = ci->multipole->m_pole.M_000; - const float multi_mass_j = cj->multipole->m_pole.M_000; + const struct multipole *multi_i = &ci->multipole->m_pole; + const struct multipole *multi_j = &cj->multipole->m_pole; const float CoM_i[3] = {ci->multipole->CoM[0] - loc_mean[0], ci->multipole->CoM[1] - loc_mean[1], ci->multipole->CoM[2] - loc_mean[2]}; @@ -248,14 +248,14 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci, if (gravity_multipole_accept(0., rmax_j, theta_crit2, r2)) { /* Interact! */ - float f_ij; - runner_iact_grav_pp_full(r2, h2_i, h_inv_i, h_inv3_i, multi_mass_j, - &f_ij); + float f_x, f_y, f_z; + runner_iact_grav_pm(dx, dy, dz, r2, h_i, h_inv_i, multi_j, &f_x, &f_y, + &f_z); /* Store it back */ - ci_cache->a_x[pid] = -f_ij * dx; - ci_cache->a_y[pid] = -f_ij * dy; - ci_cache->a_z[pid] = -f_ij * dz; + ci_cache->a_x[pid] = f_x; + ci_cache->a_y[pid] = f_y; + ci_cache->a_z[pid] = f_z; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */ @@ -353,14 +353,14 @@ void runner_dopair_grav_pp_full(struct runner *r, struct cell *ci, if (gravity_multipole_accept(0., rmax_i, theta_crit2, r2)) { /* Interact! */ - float f_ji; - runner_iact_grav_pp_full(r2, h2_j, h_inv_j, h_inv3_j, multi_mass_i, - &f_ji); + float f_x, f_y, f_z; + runner_iact_grav_pm(dx, dy, dz, r2, h_j, h_inv_j, multi_i, &f_x, &f_y, + &f_z); /* Store it back */ - cj_cache->a_x[pjd] = -f_ji * dx; - cj_cache->a_y[pjd] = -f_ji * dy; - cj_cache->a_z[pjd] = -f_ji * dz; + cj_cache->a_x[pjd] = f_x; + cj_cache->a_y[pjd] = f_y; + cj_cache->a_z[pjd] = f_z; #ifdef SWIFT_DEBUG_CHECKS /* Update the interaction counter */