diff --git a/theory/Multipoles/fmm_summary.tex b/theory/Multipoles/fmm_summary.tex index dd191d6c99da9f73c8fcac40facf4425adcbff8c..1ff9ab88ada6836d6118c7cfd74e39f4d1c504b3 100644 --- a/theory/Multipoles/fmm_summary.tex +++ b/theory/Multipoles/fmm_summary.tex @@ -6,7 +6,7 @@ evaluate for each particle in a system the potential and associated forces generated by all the other particles. Mathematically, this means evaluate \begin{equation} - \phi(\mathbf{x}_a) = \sum_{b \neq a} G m_b\phi(\mathbf{x}_a - + \phi(\mathbf{x}_a) = \sum_{b \neq a} G m_b\varphi(\mathbf{x}_a - \mathbf{x}_b)\qquad \forall~a\in N \label{eq:fmm:n_body} \end{equation} @@ -23,10 +23,11 @@ arising from nearby particles. \\ In what follows, we use the compact multi-index notation of \cite{Dehnen2014} (repeated in appendix \ref{sec:multi_index_notation} -for completeness) to simplify expressions. $\mathbf{k}$, $\mathbf{m}$ -and $\mathbf{n}$ are multi-indices and $\mathbf{r}$, $\mathbf{R}$, -$\mathbf{x}$, $\mathbf{y}$ and $\mathbf{z}$ are vectors, whilst $a$ -and $b$ are particle indices.\\ +for completeness) to simplify expressions and ease +comparisons. $\mathbf{k}$, $\mathbf{m}$ and $\mathbf{n}$ are +multi-indices and $\mathbf{r}$, $\mathbf{R}$, $\mathbf{x}$, +$\mathbf{y}$ and $\mathbf{z}$ are vectors, whilst $a$ and $b$ are +particle indices.\\ \begin{figure} \includegraphics[width=\columnwidth]{cells.pdf} @@ -46,34 +47,34 @@ with centres of mass $\mathbf{z}_A$ and $\mathbf{z}_B$ respectively, as shown on Fig.~\ref{fig:fmm:cells}, the potential generated by $b$ at the location of $a$ can be rewritten as \begin{align} - \phi(\mathbf{x}_a - \mathbf{x}_b) - &= \phi\left(\mathbf{x}_a - \mathbf{z}_A - \mathbf{x}_b + + \varphi(\mathbf{x}_a - \mathbf{x}_b) + &= \varphi\left(\mathbf{x}_a - \mathbf{z}_A - \mathbf{x}_b + \mathbf{z}_B + \mathbf{z}_A - \mathbf{z}_B\right) \nonumber \\ - &= \phi\left(\mathbf{r}_a - \mathbf{r}_b + \mathbf{R}\right) + &= \varphi\left(\mathbf{r}_a - \mathbf{r}_b + \mathbf{R}\right) \nonumber \\ &= \sum_\mathbf{k} \frac{1}{\mathbf{k}!} \left(\mathbf{r}_a - - \mathbf{r}_b\right)^{\mathbf{k}} \nabla^{\mathbf{k}}\phi(\mathbf{R}) + \mathbf{r}_b\right)^{\mathbf{k}} \nabla^{\mathbf{k}}\varphi(\mathbf{R}) \nonumber \\ &= \sum_\mathbf{k} \frac{1}{\mathbf{k}!} \sum_{\mathbf{n} < \mathbf{k}} \binom{\mathbf{k}}{\mathbf{n}} \mathbf{r}_a^{\mathbf{n}} \left(-\mathbf{r}_b\right)^{\mathbf{k} - \mathbf{n}} - \nabla^{\mathbf{k}}\phi(\mathbf{R})\nonumber \\ + \nabla^{\mathbf{k}}\varphi(\mathbf{R})\nonumber \\ &= \sum_\mathbf{n} \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} \sum_\mathbf{m} \frac{1}{\mathbf{m}!} - \left(-\mathbf{r}_b\right)^\mathbf{m} \nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R}), + \left(-\mathbf{r}_b\right)^\mathbf{m} \nabla^{\mathbf{n}+\mathbf{m}} \varphi(\mathbf{R}), \end{align} -where we used the Taylor expansion of $\phi$ around $\mathbf{R} \equiv +where we used the Taylor expansion of $\varphi$ around $\mathbf{R} \equiv \mathbf{z}_A - \mathbf{z}_B$ on the third line, used $\mathbf{r}_a \equiv \mathbf{x}_a - \mathbf{z}_A$, $\mathbf{r}_b \equiv \mathbf{x}_b - \mathbf{z}_B$ throughout and defined $\mathbf{m} \equiv \mathbf{k}-\mathbf{n}$ on the last line. Expanding the series only up to order $p$, we get \begin{equation} - \phi(\mathbf{x}_a - \mathbf{x}_b) \approx \sum_{\mathbf{n}}^{p} + \varphi(\mathbf{x}_a - \mathbf{x}_b) \approx \sum_{\mathbf{n}}^{p} \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} \sum_{\mathbf{m}}^{p -|\mathbf{n}|} \frac{1}{\mathbf{m}!} \left(-\mathbf{r}_b\right)^\mathbf{m} - \nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R}), + \nabla^{\mathbf{n}+\mathbf{m}} \varphi(\mathbf{R}), \label{eq:fmm:fmm_one_part} \end{equation} with the approximation converging as $p\rightarrow\infty$ towards the @@ -82,13 +83,13 @@ correct value provided $|\mathbf{R}|<|\mathbf{r}_a + combine their contributions to the potential at location $\mathbf{x}_a$ in cell $A$, we get \begin{align} - \phi_{BA}(\mathbf{x}_a) &= \sum_{b\in B} m_b\phi(\mathbf{x}_a - + \phi_{BA}(\mathbf{x}_a) &= \sum_{b\in B}G m_b\varphi(\mathbf{x}_a - \mathbf{x}_b) \label{eq:fmm:fmm_one_cell} \\ - &\approx \sum_{\mathbf{n}}^{p} + &\approx G\sum_{\mathbf{n}}^{p} \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} \sum_{\mathbf{m}} ^{p -|\mathbf{n}|} \frac{1}{\mathbf{m}!} \sum_{b\in B} m_b\left(-\mathbf{r}_b\right)^\mathbf{m} - \nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R}) \nonumber. + \nabla^{\mathbf{n}+\mathbf{m}} \varphi(\mathbf{R}) \nonumber. \end{align} This last equation forms the basis of the FMM. The algorithm decomposes the equation into three separated sums evaluated at @@ -106,12 +107,12 @@ compute the second kernel, \textsc{M2L} (multipole to local expansion), which corresponds to the interaction of a cell with another one: \begin{equation} - \mathsf{F}_{\mathbf{n}}(\mathbf{z}_A) = \sum_{\mathbf{m}}^{p -|\mathbf{n}|} + \mathsf{F}_{\mathbf{n}}(\mathbf{z}_A) = G\sum_{\mathbf{m}}^{p -|\mathbf{n}|} \mathsf{M}_{\mathbf{m}}(\mathbf{z}_B) \mathsf{D}_{\mathbf{n}+\mathbf{m}}(\mathbf{R}), \label{eq:fmm:M2L} \end{equation} where $\mathsf{D}_{\mathbf{n}+\mathbf{m}}(\mathbf{R}) \equiv -\nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R})$ is an order $n+m$ +\nabla^{\mathbf{n}+\mathbf{m}} \varphi(\mathbf{R})$ is an order $n+m$ derivative of the potential. This is the computationally expensive step of the FMM algorithm as the number of operations in a naive implementation using cartesian coordinates scales as @@ -165,8 +166,8 @@ multiplications (provided $\mathsf{D}$ can be evaluated quickly, see Sec.~\ref{ssec:grav_derivatives}), which are extremely efficient instructions on modern architectures. However, the fully expanded sums can lead to rather large and prone to typo expressions. To avoid any -misshap, we use a \texttt{python} to generate C code in which all the -sums are unrolled and correct by construction. In \swift, we +mishaps, we use a \texttt{python} script to generate C code in which +all the sums are unrolled and correct by construction. In \swift, we implemented the kernels up to order $p=5$, as it proved to be accurate enough for our purpose, but this could be extended to higher order easily. This implies storing $56$ numbers per cell for each diff --git a/theory/Multipoles/gravity_derivatives.tex b/theory/Multipoles/gravity_derivatives.tex index 4d9cf6c918c2214ddce432a0067cf4e2f9aaf682..e4569ef960fae5e92343f1d99902a5c14fd6ee5c 100644 --- a/theory/Multipoles/gravity_derivatives.tex +++ b/theory/Multipoles/gravity_derivatives.tex @@ -2,36 +2,36 @@ \label{ssec:grav_derivatives} The calculation of all the -$\mathsf{D}_\mathbf{n}(x,y,z) \equiv \nabla^{\mathbf{n}}\phi(x,y,z)$ terms up +$\mathsf{D}_\mathbf{n}(x,y,z) \equiv \nabla^{\mathbf{n}}\varphi(x,y,z)$ terms up to the relevent order can be quite tedious and it is beneficial to automatize the whole setup. Ideally, one would like to have an expression for each of these terms that is only made of multiplications and additions of each of the coordinates and the inverse distance. We -achieve this by writing $\phi$ as a composition of functions -$\phi(u(x,y,z))$ and apply the \textit{Fa\`a di Bruno} +achieve this by writing $\varphi$ as a composition of functions +$\varphi(u(x,y,z))$ and apply the \textit{Fa\`a di Bruno} formula \citep[i.e. the ``chain rule'' for higher order derivatives, see e.g.][]{Hardy2006} to construct our terms: \begin{equation} \label{eq:faa_di_bruno} -\frac{\partial^n}{\partial x_1 \cdots \partial x_n} \phi(u) -= \sum_{A} \phi^{(|A|)}(u) \prod_{B \in +\frac{\partial^n}{\partial x_1 \cdots \partial x_n} \varphi(u) += \sum_{A} \varphi^{(|A|)}(u) \prod_{B \in A} \frac{\partial^{|B|}}{\prod_{c\in B}\partial x_c} u(x,y,z), \end{equation} where $A$ is the set of all partitions of $\lbrace1,\cdots, n\rbrace$, $B$ is a block of a partition in the set $A$ and $|\cdot|$ denotes the -cardinality of a set. For generic functions $\phi$ and $u$ this +cardinality of a set. For generic functions $\varphi$ and $u$ this formula yields an untracktable number of terms; an 8th-order derivative will have $4140$ (!) terms in the sum\footnote{The number of terms in the sum is given by the Bell number of the same order.}. \\ For the un-softened gravitational potential, we choose to write \begin{align} - \phi(x,y,z) &= 1 / \sqrt{u(x,y,z)}, \\ + \varphi(x,y,z) &= 1 / \sqrt{u(x,y,z)}, \\ u(x,y,z) &= x^2 + y^2 + z^2. \end{align} -This choice allows to have derivatives of any order of $\phi(u)$ that +This choice allows to have derivatives of any order of $\varphi(u)$ that can be easily expressed and only depend on powers of $u$: \begin{equation} -\phi^{(n)}(u) = (-1)^n\cdot\frac{(2n-1)!!}{2^n}\cdot\frac{1}{u^{n+\frac{1}{2}}}, +\varphi^{(n)}(u) = (-1)^n\cdot\frac{(2n-1)!!}{2^n}\cdot\frac{1}{u^{n+\frac{1}{2}}}, \end{equation} where $!!$ denotes the semi-factorial. More importantly, this choice of decomposition allows us to have non-zero derivatives of $u$ @@ -39,7 +39,7 @@ only up to second order in $x$, $y$ or $z$. The number of non-zero terms in eq. \ref{eq:faa_di_bruno} is hence drastically reduced. For instance, when computing $\mathsf{D}_{(4,1,3)}(\mathbf{r}) \equiv \frac{\partial^8}{\partial x^4 \partial y \partial z^3} -\phi(u(x,y,z))$, $4100$ of the $4140$ terms will involve at least one +\varphi(u(x,y,z))$, $4100$ of the $4140$ terms will involve at least one zero-valued derivative (e.g. $\partial^3/\partial x^3$ or $\partial^2/\partial x\partial y$) of $u$. Furthermore, among the 40 remaining terms, many will involve the same combination of derivatives diff --git a/theory/Multipoles/plot_potential.py b/theory/Multipoles/plot_potential.py index 620cc5712c546fe1fc7a22dd9b3fb5483f70226e..8761314572cdbda1304cdf882f920651b58be08e 100644 --- a/theory/Multipoles/plot_potential.py +++ b/theory/Multipoles/plot_potential.py @@ -141,7 +141,7 @@ plot([epsilon, epsilon], [-10, 10], 'k-', alpha=0.5, lw=0.5) plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) ylim(0, 2.3) -ylabel("$\\phi(r)$", labelpad=1) +ylabel("$\\varphi(r)$", labelpad=1) #yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)]) xlim(0,r_max_plot) @@ -163,6 +163,6 @@ xticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0./epsilon), "", "$%.1f$"%(1./eps xlabel("$r/H$", labelpad=-7) ylim(0, 0.95) -ylabel("$|\\overrightarrow{\\nabla}\\phi(r)|$", labelpad=0) +ylabel("$|\\overrightarrow{\\nabla}\\varphi(r)|$", labelpad=0) savefig("potential.pdf") diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex index 9010f06e91e8e0df0d36b539dee09d7b5a67bede..5c7b1e6566d7d51b5d27ea3c24d785571e1ad692 100644 --- a/theory/Multipoles/potential_derivatives.tex +++ b/theory/Multipoles/potential_derivatives.tex @@ -7,7 +7,7 @@ the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$ and $u=r/H$. Starting from the potential (Eq. \ref{eq:fmm:potential}, reproduced here for clarity), \begin{align} -\mathsf{D}_{000}(\mathbf{r}) = \phi (\mathbf{r},H) = +\mathsf{D}_{000}(\mathbf{r}) = \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} \frac{1}{H} \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right) & \mbox{if} & u < 1,\\ \frac{1}{r} & \mbox{if} & u \geq 1, @@ -19,7 +19,7 @@ we can construct the higher order terms by successively applying the relevant ones here split by order. \begin{align} -\mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \phi (\mathbf{r},H) = +\mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_x}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ -\frac{r_x}{r^3} & \mbox{if} & u \geq 1, @@ -30,7 +30,7 @@ relevant ones here split by order. \noindent\rule{6cm}{0.4pt} \begin{align} -\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \phi (\mathbf{r},H) = +\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} \frac{r_x^2}{H^5}\left(-105u^3+360u^2-420u+168\right) - \frac{1}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ @@ -40,7 +40,7 @@ relevant ones here split by order. \end{align} \begin{align} -\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \phi (\mathbf{r},H) = +\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} \frac{r_xr_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ 3\frac{r_xr_y}{r^5} & \mbox{if} & u \geq 1, @@ -51,7 +51,7 @@ relevant ones here split by order. \noindent\rule{6cm}{0.4pt} \begin{align} -\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = +\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_x^3}{H^7} \left(315u - 720 + 420u^{-1}\right) + \frac{3r_x}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ @@ -61,7 +61,7 @@ relevant ones here split by order. \end{align} \begin{align} -\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = +\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_x^2r_y}{H^7} \left(315u - 720 + 420u^{-1}\right) + \frac{r_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ @@ -72,7 +72,7 @@ relevant ones here split by order. \begin{align} -\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \phi (\mathbf{r},H) = +\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \varphi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_xr_yr_z}{H^7} \left(315u - 720 + 420u^{-1}\right) & \mbox{if} & u < 1,\\ -15\frac{r_xr_yr_z}{r^7} & \mbox{if} & u \geq 1, diff --git a/theory/Multipoles/potential_softening.tex b/theory/Multipoles/potential_softening.tex index bdfc27da290d23a8c5fbd1423c442f3948616001..aa9ee12340a3492a19dcf9048548952ef7e141e1 100644 --- a/theory/Multipoles/potential_softening.tex +++ b/theory/Multipoles/potential_softening.tex @@ -10,9 +10,10 @@ $\epsilon$. Instead of the commonly used spline kernel of is cheaper to compute and has a very similar overall shape. The C2 kernel has the advantage of being branch-free leading to an expression which is faster to evaluate using vector units available on modern -architectures; it also does not require any division. We set -$\tilde\delta(\mathbf{x}) = \rho(|\mathbf{x}|) = W(|\mathbf{x}|, -3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by +architectures; it also does not require any divisions to evaluate the +softened forces. We set $\tilde\delta(\mathbf{x}) = +\rho(|\mathbf{x}|) = W(|\mathbf{x}|, 3\epsilon_{\rm Plummer})$, with +$W(r, H)$ given by \begin{align} W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\ @@ -22,9 +23,9 @@ W(r,H) &= \frac{21}{2\pi H^3} \times \nonumber \\ \end{array} \right. \end{align} -and $u = r/H$. The potential $\phi(r,H)$ corresponding to this density distribution reads +and $u = r/H$. The potential $\varphi(r,H)$ corresponding to this density distribution reads \begin{align} -\phi = +\varphi = \left\lbrace\begin{array}{rcl} \frac{1}{H} (-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3) & \mbox{if} & u < 1,\\ \frac{1}{r} & \mbox{if} & u \geq 1.