Commit d53edf05 authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

First attempt at a hand-written faster version of the M2L kernel.

parent 090a5955
......@@ -33,6 +33,163 @@
/* Local headers. */
#include "inline.h"
struct potential_derivatives {
/* 0th order terms */
float D_000;
#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
/* 1st order terms */
float D_100, D_010, D_001;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
/* 2nd order terms */
float D_200, D_020, D_002;
float D_110, D_101, D_011;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
/* 3rd order terms */
float D_300, D_030, D_003;
float D_210, D_201;
float D_120, D_021;
float D_102, D_012;
float D_111;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
/* 4th order terms */
float D_400, D_040, D_004;
float D_310, D_301;
float D_130, D_031;
float D_103, D_013;
float D_220, D_202, D_022;
float D_211, D_121, D_112;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
/* 5th order terms */
float D_005, D_014, D_023;
float D_032, D_041, D_050;
float D_104, D_113, D_122;
float D_131, D_140, D_203;
float D_212, D_221, D_230;
float D_302, D_311, D_320;
float D_401, D_410, D_500;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
#error "Missing implementation for order >5"
#endif
};
__attribute__((always_inline)) INLINE static void compute_potential_derivatives(
float r_x, float r_y, float r_z, float r_inv,
struct potential_derivatives *pot) {
/* Compute some (odd) powers of 1/r */
#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
const float r_inv2 = r_inv * r_inv;
const float r_inv3 = -1.f * r_inv * r_inv2; /* -1 / r^3 */
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
const float r_inv5 = -3.f * r_inv3 * r_inv2; /* 3 / r^5 */
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
const float r_inv7 = -5.f * r_inv5 * r_inv2; /* -15 / r^7 */
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
const float r_inv9 = -7.f * r_inv7 * r_inv2; /* 105 / r^9 */
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
const float r_inv11 = -9.f * r_inv9 * r_inv2; /* -945 / r^11 */
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
#error "Missing implementation for order >5"
#endif
/* Compute some powers of r_x, r_y and r_z */
#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
const float r_x2 = r_x * r_x;
const float r_y2 = r_y * r_y;
const float r_z2 = r_z * r_z;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
const float r_x3 = r_x2 * r_x;
const float r_y3 = r_y2 * r_y;
const float r_z3 = r_z2 * r_z;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
const float r_x4 = r_x3 * r_x;
const float r_y4 = r_y3 * r_y;
const float r_z4 = r_z3 * r_z;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 4
const float r_x5 = r_x4 * r_x;
const float r_y5 = r_y4 * r_y;
const float r_z5 = r_z4 * r_z;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
#error "Missing implementation for order >5"
#endif
/* Get the 0th order term */
pot->D_000 = r_inv;
#if SELF_GRAVITY_MULTIPOLE_ORDER > 0
/* 1st order derivatives */
pot->D_100 = r_x * r_inv3;
pot->D_010 = r_y * r_inv3;
pot->D_001 = r_z * r_inv3;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 1
/* 2nd order derivatives */
pot->D_200 = r_x2 * r_inv5 + r_inv3;
pot->D_020 = r_y2 * r_inv5 + r_inv3;
pot->D_002 = r_z2 * r_inv5 + r_inv3;
pot->D_110 = r_x * r_y * r_inv5;
pot->D_101 = r_x * r_z * r_inv5;
pot->D_011 = r_y * r_z * r_inv5;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 2
/* 3rd order derivatives */
pot->D_300 = r_x3 * r_inv7 + 3.f * r_x * r_inv5;
pot->D_030 = r_y3 * r_inv7 + 3.f * r_y * r_inv5;
pot->D_003 = r_z3 * r_inv7 + 3.f * r_z * r_inv5;
pot->D_210 = r_x2 * r_y * r_inv7 + r_y * r_inv5;
pot->D_201 = r_x2 * r_z * r_inv7 + r_z * r_inv5;
pot->D_120 = r_y2 * r_x * r_inv7 + r_x * r_inv5;
pot->D_021 = r_y2 * r_z * r_inv7 + r_z * r_inv5;
pot->D_102 = r_z2 * r_x * r_inv7 + r_x * r_inv5;
pot->D_012 = r_z2 * r_y * r_inv7 + r_y * r_inv5;
pot->D_111 = r_x * r_y * r_z * r_inv7;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 3
/* 4th order derivatives */
pot->D_400 = r_x4 * r_inv9 + 6.f * r_x2 * r_inv7 + 3.f * r_inv5;
pot->D_040 = r_y4 * r_inv9 + 6.f * r_y2 * r_inv7 + 3.f * r_inv5;
pot->D_004 = r_z4 * r_inv9 + 6.f * r_z2 * r_inv7 + 3.f * r_inv5;
pot->D_310 = r_x3 * r_y * r_inv9 + 3.f * r_x * r_y * r_inv7;
pot->D_301 = r_x3 * r_z * r_inv9 + 3.f * r_x * r_z * r_inv7;
pot->D_130 = r_y3 * r_x * r_inv9 + 3.f * r_y * r_x * r_inv7;
pot->D_031 = r_y3 * r_z * r_inv9 + 3.f * r_y * r_z * r_inv7;
pot->D_103 = r_z3 * r_x * r_inv9 + 3.f * r_z * r_x * r_inv7;
pot->D_013 = r_z3 * r_y * r_inv9 + 3.f * r_z * r_y * r_inv7;
pot->D_220 = r_x2 * r_y2 * r_inv9 + r_x2 * r_inv7 + r_y2 * r_inv7 + r_inv5;
pot->D_202 = r_x2 * r_z2 * r_inv9 + r_x2 * r_inv7 + r_z2 * r_inv7 + r_inv5;
pot->D_022 = r_y2 * r_z2 * r_inv9 + r_y2 * r_inv7 + r_z2 * r_inv7 + r_inv5;
pot->D_211 = r_x2 * r_y * r_z * r_inv9 + r_y * r_z * r_inv5;
pot->D_121 = r_y2 * r_x * r_z * r_inv9 + r_x * r_z * r_inv5;
pot->D_112 = r_z2 * r_x * r_z * r_inv9 + r_x * r_y * r_inv5;
#endif
#if SELF_GRAVITY_MULTIPOLE_ORDER > 5
#error "Missing implementation for order >5"
#endif
}
#if 0
/*************************/
/* 0th order derivatives */
/*************************/
......@@ -1084,4 +1241,6 @@ __attribute__((always_inline)) INLINE static double D_500(double r_x,
/* 26 zero-valued terms not written out */
}
#endif
#endif /* SWIFT_GRAVITY_DERIVATIVE_H */
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment