diff --git a/configure.ac b/configure.ac index 75a333a3d8f6554752dc70978c3ca45aecb09dc9..91ee913c03d5559ed496137c5e3b9422f0808848 100644 --- a/configure.ac +++ b/configure.ac @@ -829,10 +829,10 @@ esac # Gravity multipole order AC_ARG_WITH([multipole-order], [AS_HELP_STRING([--with-multipole-order=<order>], - [order of the multipole and gravitational field expansion @<:@ default: 3@:>@] + [order of the multipole and gravitational field expansion @<:@ default: 4@:>@] )], [with_multipole_order="$withval"], - [with_multipole_order="3"] + [with_multipole_order="4"] ) AC_DEFINE_UNQUOTED([SELF_GRAVITY_MULTIPOLE_ORDER], [$with_multipole_order], [Multipole order]) diff --git a/theory/Multipoles/bibliography.bib b/theory/Multipoles/bibliography.bib index 12e274dd63093ba1e14750249f2538c092e5268a..193db42ea4947e49930b79cbd663562d971ec2d4 100644 --- a/theory/Multipoles/bibliography.bib +++ b/theory/Multipoles/bibliography.bib @@ -96,3 +96,69 @@ doi="10.1007/BF02123482", url="http://dx.doi.org/10.1007/BF02123482" } + + +@article{Greengard1987, +title = "A fast algorithm for particle simulations", +journal = "Journal of Computational Physics", +volume = "73", +number = "2", +pages = "325 - 348", +year = "1987", +note = "", +issn = "0021-9991", +doi = "http://dx.doi.org/10.1016/0021-9991(87)90140-9", +url = "http://www.sciencedirect.com/science/article/pii/0021999187901409", +author = "L Greengard and V Rokhlin", +} + +@article{Cheng1999, +title = "A Fast Adaptive Multipole Algorithm in Three Dimensions", +journal = "Journal of Computational Physics", +volume = "155", +number = "2", +pages = "468 - 498", +year = "1999", +note = "", +issn = "0021-9991", +doi = "http://dx.doi.org/10.1006/jcph.1999.6355", +url = "http://www.sciencedirect.com/science/article/pii/S0021999199963556", +author = "H. Cheng and L. Greengard and V. Rokhlin", +keywords = "Laplace equation", +keywords = "translation operators", +keywords = "fast multipole method", +keywords = "adaptive algorithms" +} + + +@ARTICLE{Dehnen2000, + author = {{Dehnen}, W.}, + title = "{A Very Fast and Momentum-conserving Tree Code}", + journal = {\apjl}, + eprint = {astro-ph/0003209}, + keywords = {Celestial Mechanics, Stellar Dynamics, Methods: n-Body Simulations, Methods: Numerical}, + year = 2000, + month = jun, + volume = 536, + pages = {L39-L42}, + doi = {10.1086/312724}, + adsurl = {http://adsabs.harvard.edu/abs/2000ApJ...536L..39D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + +@ARTICLE{Dehnen2002, + author = {{Dehnen}, W.}, + title = "{A Hierarchical $\lt$E10$\gt$O$\lt$/E10$\gt$(N) Force Calculation Algorithm}", + journal = {Journal of Computational Physics}, + eprint = {astro-ph/0202512}, + year = 2002, + month = jun, + volume = 179, + pages = {27-42}, + doi = {10.1006/jcph.2002.7026}, + adsurl = {http://adsabs.harvard.edu/abs/2002JCoPh.179...27D}, + adsnote = {Provided by the SAO/NASA Astrophysics Data System} +} + + + diff --git a/theory/Multipoles/cells.odg b/theory/Multipoles/cells.odg new file mode 100644 index 0000000000000000000000000000000000000000..ada8fd7a1a6e746fca93f2b1ed04b78a6b7f9097 Binary files /dev/null and b/theory/Multipoles/cells.odg differ diff --git a/theory/Multipoles/cells.pdf b/theory/Multipoles/cells.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b1c144ce83e44b42f3fb7dab48b675cca19a288e Binary files /dev/null and b/theory/Multipoles/cells.pdf differ diff --git a/theory/Multipoles/fmm_standalone.tex b/theory/Multipoles/fmm_standalone.tex index fcd727a89abe95bba69b23c58ce5067c8cc53211..dc4266a23110873ff38ccbec4d71345e2780d6b2 100644 --- a/theory/Multipoles/fmm_standalone.tex +++ b/theory/Multipoles/fmm_standalone.tex @@ -4,27 +4,45 @@ \usepackage{times} \newcommand{\swift}{{\sc Swift}\xspace} +\newcommand{\nbody}{$N$-body\xspace} %opening \title{FMM in SWIFT} \author{Matthieu Schaller} - \begin{document} +\date{\today} + +\pagerange{\pageref{firstpage}--\pageref{lastpage}} \pubyear{2014} + \maketitle -We use the multi-index notation of \cite{Dehnen2014} to simplify expressions. +\label{firstpage} + +\begin{abstract} +Making gravity great again. +\end{abstract} + +\begin{keywords} +\end{keywords} + +\section{Gravity in \swift} +\label{sec:gravity} \input{potential_softening} +\input{fmm_summary} \input{gravity_derivatives} +\input{mesh_summary} \bibliographystyle{mnras} \bibliography{./bibliography.bib} \appendix +\input{vector_notation} \onecolumn \input{potential_derivatives} +\label{lastpage} \end{document} diff --git a/theory/Multipoles/fmm_summary.tex b/theory/Multipoles/fmm_summary.tex new file mode 100644 index 0000000000000000000000000000000000000000..dd191d6c99da9f73c8fcac40facf4425adcbff8c --- /dev/null +++ b/theory/Multipoles/fmm_summary.tex @@ -0,0 +1,181 @@ +\subsection{Evaluating the forces using the Fast Multipole Method} +\label{ssec:fmm_summary} + +The algorithmically challenging aspect of the \nbody problem is to +evaluate for each particle in a system the potential and associated +forces generated by all the other particles. Mathematically, this means +evaluate +\begin{equation} + \phi(\mathbf{x}_a) = \sum_{b \neq a} G m_b\phi(\mathbf{x}_a - + \mathbf{x}_b)\qquad \forall~a\in N + \label{eq:fmm:n_body} +\end{equation} +efficiently for large numbers of particles $N$. In the case of collisionless +dynamics, the particles are a mere Monte-Carlo sampling of the +underlying coarse-grained phase-space distribution which justifies the +use of approximate method to evaluate Eq.~\ref{eq:fmm:n_body}. The +\emph{Fast Multipole Method} (FMM) \citep{Greengard1987, Cheng1999}, +popularized in the field and adapted specifically for gravity solvers +by \cite{Dehnen2000, Dehnen2002}, is an $\mathcal{O}(N)$ method +designed to solve Eq.~\ref{eq:fmm:n_body} by expanding the potential both +around $\mathbf{x}_i$ and $\mathbf{x}_j$ and grouping similar terms +arising from nearby particles. \\ + +In what follows, we use the compact multi-index notation of +\cite{Dehnen2014} (repeated in appendix \ref{sec:multi_index_notation} +for completeness) to simplify expressions. $\mathbf{k}$, $\mathbf{m}$ +and $\mathbf{n}$ are multi-indices and $\mathbf{r}$, $\mathbf{R}$, +$\mathbf{x}$, $\mathbf{y}$ and $\mathbf{z}$ are vectors, whilst $a$ +and $b$ are particle indices.\\ + +\begin{figure} +\includegraphics[width=\columnwidth]{cells.pdf} +\caption{The basics of the FMM: The potential generated by a particle + at position $\mathbf{x}_b$ on a particle at position at location + $\mathbf{x}_a$ is replaced by a Taylor expansion of the potential + around the distance vector $\mathbf{R}$ linking the two centres of mass + ($\mathbf{z}_A$ and $\mathbf{z}_B$) of cell $A$ and $B$. The + expansion converges towards the exact expression provided + $|\mathbf{R}|<|\mathbf{r}_a + \mathbf{r}_b|$.} +\label{fig:fmm:cells} +\end{figure} + + +For a single pair of particles $a$ and $b$ located in cell $A$ and $B$ +with centres of mass $\mathbf{z}_A$ and $\mathbf{z}_B$ +respectively, as shown on Fig.~\ref{fig:fmm:cells}, the potential +generated by $b$ at the location of $a$ can be rewritten as +\begin{align} + \phi(\mathbf{x}_a - \mathbf{x}_b) + &= \phi\left(\mathbf{x}_a - \mathbf{z}_A - \mathbf{x}_b + + \mathbf{z}_B + \mathbf{z}_A - \mathbf{z}_B\right) \nonumber \\ + &= \phi\left(\mathbf{r}_a - \mathbf{r}_b + \mathbf{R}\right) + \nonumber \\ + &= \sum_\mathbf{k} \frac{1}{\mathbf{k}!} \left(\mathbf{r}_a - + \mathbf{r}_b\right)^{\mathbf{k}} \nabla^{\mathbf{k}}\phi(\mathbf{R}) + \nonumber \\ + &= \sum_\mathbf{k} \frac{1}{\mathbf{k}!} \sum_{\mathbf{n} < + \mathbf{k}} \binom{\mathbf{k}}{\mathbf{n}} \mathbf{r}_a^{\mathbf{n}} + \left(-\mathbf{r}_b\right)^{\mathbf{k} - \mathbf{n}} + \nabla^{\mathbf{k}}\phi(\mathbf{R})\nonumber \\ + &= \sum_\mathbf{n} \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} + \sum_\mathbf{m} \frac{1}{\mathbf{m}!} + \left(-\mathbf{r}_b\right)^\mathbf{m} \nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R}), +\end{align} +where we used the Taylor expansion of $\phi$ around $\mathbf{R} \equiv +\mathbf{z}_A - \mathbf{z}_B$ on the third line, used $\mathbf{r}_a +\equiv \mathbf{x}_a - \mathbf{z}_A$, $\mathbf{r}_b \equiv \mathbf{x}_b +- \mathbf{z}_B$ throughout and defined $\mathbf{m} \equiv +\mathbf{k}-\mathbf{n}$ on the last line. Expanding the series only up +to order $p$, we get +\begin{equation} + \phi(\mathbf{x}_a - \mathbf{x}_b) \approx \sum_{\mathbf{n}}^{p} + \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} \sum_{\mathbf{m}}^{p + -|\mathbf{n}|} + \frac{1}{\mathbf{m}!} \left(-\mathbf{r}_b\right)^\mathbf{m} + \nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R}), + \label{eq:fmm:fmm_one_part} +\end{equation} +with the approximation converging as $p\rightarrow\infty$ towards the +correct value provided $|\mathbf{R}|<|\mathbf{r}_a + +\mathbf{r}_b|$. If we now consider all the particles within $B$ and +combine their contributions to the potential at location +$\mathbf{x}_a$ in cell $A$, we get +\begin{align} + \phi_{BA}(\mathbf{x}_a) &= \sum_{b\in B} m_b\phi(\mathbf{x}_a - + \mathbf{x}_b) \label{eq:fmm:fmm_one_cell} \\ + &\approx \sum_{\mathbf{n}}^{p} + \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} \sum_{\mathbf{m}} + ^{p -|\mathbf{n}|} + \frac{1}{\mathbf{m}!} \sum_{b\in B} m_b\left(-\mathbf{r}_b\right)^\mathbf{m} + \nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R}) \nonumber. +\end{align} +This last equation forms the basis of the FMM. The algorithm +decomposes the equation into three separated sums evaluated at +different stages.\\ + +In a first step, multipoles are constructed from the +innermost sum. For each cell, we compute all the terms +\begin{equation} + \mathsf{M}_{\mathbf{m}}(\mathbf{z}_B) = \frac{1}{\mathbf{m}!} + \sum_{b\in B} m_b\left(-\mathbf{r}_b\right)^\mathbf{m} \label{eq:fmm:P2M} +\end{equation} +up to order $p$. This is the first kernel of the method, commonly +labelled as \textsc{P2M} (particle to multipole). In a second step, we +compute the second kernel, \textsc{M2L} (multipole to local +expansion), which corresponds to the interaction of a cell with +another one: +\begin{equation} + \mathsf{F}_{\mathbf{n}}(\mathbf{z}_A) = \sum_{\mathbf{m}}^{p -|\mathbf{n}|} + \mathsf{M}_{\mathbf{m}}(\mathbf{z}_B) + \mathsf{D}_{\mathbf{n}+\mathbf{m}}(\mathbf{R}), \label{eq:fmm:M2L} +\end{equation} +where $\mathsf{D}_{\mathbf{n}+\mathbf{m}}(\mathbf{R}) \equiv +\nabla^{\mathbf{n}+\mathbf{m}} \phi(\mathbf{R})$ is an order $n+m$ +derivative of the potential. This is the computationally expensive +step of the FMM algorithm as the number of operations in a naive +implementation using cartesian coordinates scales as +$\mathcal{O}(p^6)$. More advanced techniques +\citep[e.g.][]{Dehnen2014} can bring the cost down to +$\mathcal{O}(p^3)$, albeit at a considerable algebraic cost. For +collisionless dynamics, high accuracy is not required and low values +of $p$ are sufficient, which maintains the computational cost of the +M2L kernel at a reasonable level. +Finally, in the last step, the potential is propagated from the local +expansion centre to the particles (L2P kernel) using +\begin{equation} + \phi_{BA}(\mathbf{x}_a) = \sum_{\mathbf{n}}^{p} + \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} + \mathsf{F}_{\mathbf{n}}(\mathbf{z}_A). \label{eq:fmm:L2P} +\end{equation} +In summary, the potential generated by a cell $B$ on the particles in +cell $A$ is obtained by the successive application of the P2M, M2L and +L2P kernels. The P2M and L2P kernels are applied only once per +particle, whilst one M2L calculation has to be performed for each pair +of cells. The forces applied to the particles are obtained by the same +procedure using an extra order in the Taylor expansion. For instance, +for the acceleration along $x$, we have: +\begin{equation} + a_x(\mathbf{x}_a) = \sum_{\mathbf{n}}^{p-1} + \frac{1}{\mathbf{n}!} \mathbf{r}_a^{\mathbf{n}} + \mathsf{F}_{\mathbf{n}+\left(1,0,0\right)}(\mathbf{z}_A). \label{eq:fmm:L2P_force} +\end{equation} + +In practice, the multipoles can be constructed recursively from the +leaves of the tree to the root and the local expansions from the root +to the leaves by shifting the $\mathsf{M}$ and $\mathsf{F}$ tensors +and adding their contributions to their parent or daughter cell's +tensors respecitvely. The shifting formulas (M2M and L2L kernels) +read: + +\begin{align} + \mathsf{M}_{\mathbf{m}}(\mathbf{x} + \mathbf{y}) &= + \sum_{\mathbf{n}}^{\mathbf{m}} + \frac{\mathbf{y}^\mathbf{n}}{\mathbf{n}!}\mathsf{M}_{\mathbf{m} - + \mathbf{n}}(\mathbf{x}), \label{eq:fmm:M2M} \\ + \mathsf{F}_{\mathbf{n}}(\mathbf{x} + \mathbf{y}) &= + \sum_{\mathbf{m}}^{p-|\mathbf{n}|} + \frac{\mathbf{y}^\mathbf{m}}{\mathbf{m}!}\mathsf{F}_{\mathbf{m} + + \mathbf{n}}(\mathbf{x}). \label{eq:fmm:L2L} +\end{align} + +All the kernels (Eqs.~\ref{eq:fmm:P2M}-\ref{eq:fmm:L2L}) are rather +straightforward to evaluate as they are only made of additions and +multiplications (provided $\mathsf{D}$ can be evaluated quickly, see +Sec.~\ref{ssec:grav_derivatives}), which are extremely efficient +instructions on modern architectures. However, the fully expanded sums +can lead to rather large and prone to typo expressions. To avoid any +misshap, we use a \texttt{python} to generate C code in which all the +sums are unrolled and correct by construction. In \swift, we +implemented the kernels up to order $p=5$, as it proved to be accurate +enough for our purpose, but this could be extended to higher order +easily. This implies storing $56$ numbers per cell for each +$\textsf{M}$ and $\textsf{F}$ plus three numbers for the location of +the centre of mass. For leaf-cells with large numbers of particles, as +in \swift, this is a small memory overhead. One further small +improvement consists in choosing $\mathbf{z}_A$ to be the centre of +mass of cell $A$ rather than its geometrical centre. The first order +multipoles ($\mathsf{M}_{100},\mathsf{M}_{010},\mathsf{M}_{001}$) then +vanish by construction. This allows us to simplify some of the +expressions and helps reduce, albeit by a small fraction, the memory +footprint of the tree structure. diff --git a/theory/Multipoles/gravity_derivatives.py b/theory/Multipoles/generate_multipoles/gravity_derivatives.py similarity index 100% rename from theory/Multipoles/gravity_derivatives.py rename to theory/Multipoles/generate_multipoles/gravity_derivatives.py diff --git a/theory/Multipoles/multipoles.py b/theory/Multipoles/generate_multipoles/multipoles.py similarity index 100% rename from theory/Multipoles/multipoles.py rename to theory/Multipoles/generate_multipoles/multipoles.py diff --git a/theory/Multipoles/vector_powers.py b/theory/Multipoles/generate_multipoles/vector_powers.py similarity index 100% rename from theory/Multipoles/vector_powers.py rename to theory/Multipoles/generate_multipoles/vector_powers.py diff --git a/theory/Multipoles/gravity_derivatives.tex b/theory/Multipoles/gravity_derivatives.tex index e4c7b1565ab6c82de5623d5a643c3a8bd1fa513f..4d9cf6c918c2214ddce432a0067cf4e2f9aaf682 100644 --- a/theory/Multipoles/gravity_derivatives.tex +++ b/theory/Multipoles/gravity_derivatives.tex @@ -1,16 +1,16 @@ -\subsection{Derivatives of the gravitational potential} +\subsection{Notes on the derivatives of the gravitational potential} +\label{ssec:grav_derivatives} The calculation of all the -$D_\mathbf{n}(x,y,z) \equiv \nabla^{\mathbf{n}}\phi(x,y,z)$ terms up +$\mathsf{D}_\mathbf{n}(x,y,z) \equiv \nabla^{\mathbf{n}}\phi(x,y,z)$ terms up to the relevent order can be quite tedious and it is beneficial to automatize the whole setup. Ideally, one would like to have an -expression for each of this term that is only made of multiplications +expression for each of these terms that is only made of multiplications and additions of each of the coordinates and the inverse distance. We achieve this by writing $\phi$ as a composition of functions $\phi(u(x,y,z))$ and apply the \textit{Fa\`a di Bruno} formula \citep[i.e. the ``chain rule'' for higher order derivatives, -e.g.][]{Hardy2006} to construct our terms: - + see e.g.][]{Hardy2006} to construct our terms: \begin{equation} \label{eq:faa_di_bruno} \frac{\partial^n}{\partial x_1 \cdots \partial x_n} \phi(u) @@ -18,35 +18,38 @@ e.g.][]{Hardy2006} to construct our terms: A} \frac{\partial^{|B|}}{\prod_{c\in B}\partial x_c} u(x,y,z), \end{equation} where $A$ is the set of all partitions of $\lbrace1,\cdots, n\rbrace$, -$B$ is a block of a partition $A$ and $|\cdot|$ denotes the +$B$ is a block of a partition in the set $A$ and $|\cdot|$ denotes the cardinality of a set. For generic functions $\phi$ and $u$ this formula yields an untracktable number of terms; an 8th-order derivative will have $4140$ (!) terms in the sum\footnote{The number -of terms in the sum is given by the Bell number of the same order}. \\ -We choose to write + of terms in the sum is given by the Bell number of the same + order.}. \\ For the un-softened gravitational potential, we choose to write \begin{align} \phi(x,y,z) &= 1 / \sqrt{u(x,y,z)}, \\ u(x,y,z) &= x^2 + y^2 + z^2. \end{align} This choice allows to have derivatives of any order of $\phi(u)$ that -only depend on powers of $u$: - +can be easily expressed and only depend on powers of $u$: \begin{equation} -f^{(n)}(u) = \frac{\Gamma(\frac{1}{2})}{\Gamma(\frac{1}{2} - -n)}\frac{1}{u^{n+\frac{1}{2}}}. +\phi^{(n)}(u) = (-1)^n\cdot\frac{(2n-1)!!}{2^n}\cdot\frac{1}{u^{n+\frac{1}{2}}}, \end{equation} -More importantly, this choice of decomposition allows us to have -derivatives of $u$ only up to second order in $x$, $y$ or $z$. The -number of non-zero terms in eq. \ref{eq:faa_di_bruno} is hence -drastically reduced. For instance, when computing -$D_{(4,1,3)} \equiv \frac{\partial^8}{\partial x^4 \partial y \partial -z^3} \phi$, $4100$ of the $4140$ terms will involve at least one +where $!!$ denotes the semi-factorial. More importantly, this +choice of decomposition allows us to have non-zero derivatives of $u$ +only up to second order in $x$, $y$ or $z$. The number of non-zero +terms in eq. \ref{eq:faa_di_bruno} is hence drastically reduced. For +instance, when computing $\mathsf{D}_{(4,1,3)}(\mathbf{r}) \equiv +\frac{\partial^8}{\partial x^4 \partial y \partial z^3} +\phi(u(x,y,z))$, $4100$ of the $4140$ terms will involve at least one zero-valued derivative (e.g. $\partial^3/\partial x^3$ or $\partial^2/\partial x\partial y$) of $u$. Furthermore, among the 40 -remaining terms, many will involve the same derivatives and can be -grouped together, leaving us with a sum of six products of $x$,$y$ and -$z$. This is generally the case for most of the $D_\mathbf{n}$'s and -figuring out which terms are identical in a given set of partitions of -$\lbrace1,\cdots, n\rbrace$ is an interesting exercise in -combinatorics left for the reader \citep[see also][]{Hardy2006}. +remaining terms, many will involve the same combination of derivatives +of $u$ and can be grouped together, leaving us with a sum of six +products of $x$,$y$ and $z$. This is generally the case for most of +the $\mathsf{D}_\mathbf{n}$'s and figuring out which terms are identical in a +given set of partitions of $\lbrace1,\cdots, n\rbrace$ is an +interesting exercise in combinatorics left for the reader \citep[see + also][]{Hardy2006}. We use a \texttt{python} script based on this +technique to generate the actual C routines used within \swift. Some +examples of these terms are given in Appendix +\ref{sec:pot_derivatives}. diff --git a/theory/Multipoles/mesh_summary.tex b/theory/Multipoles/mesh_summary.tex new file mode 100644 index 0000000000000000000000000000000000000000..c9e8bc4577e1040ff7b6daadea25736ad6508ff9 --- /dev/null +++ b/theory/Multipoles/mesh_summary.tex @@ -0,0 +1,2 @@ +\subsection{Coupling the FMM to a mesh for periodic long-range forces} +\label{ssec:mesh_summary} diff --git a/theory/Multipoles/potential.py b/theory/Multipoles/plot_potential.py similarity index 93% rename from theory/Multipoles/potential.py rename to theory/Multipoles/plot_potential.py index 559f590762a3cbef171c5dd584cbc517879a2cec..620cc5712c546fe1fc7a22dd9b3fb5483f70226e 100644 --- a/theory/Multipoles/potential.py +++ b/theory/Multipoles/plot_potential.py @@ -141,7 +141,7 @@ plot([epsilon, epsilon], [-10, 10], 'k-', alpha=0.5, lw=0.5) plot([epsilon/plummer_equivalent_factor, epsilon/plummer_equivalent_factor], [0, 10], 'k-', alpha=0.5, lw=0.5) ylim(0, 2.3) -ylabel("$|\\phi(r)|$", labelpad=1) +ylabel("$\\phi(r)$", labelpad=1) #yticks([0., 0.5, 1., 1.5, 2., 2.5], ["$%.1f$"%(0.*epsilon), "$%.1f$"%(0.5*epsilon), "$%.1f$"%(1.*epsilon), "$%.1f$"%(1.5*epsilon), "$%.1f$"%(2.*epsilon)]) xlim(0,r_max_plot) @@ -166,16 +166,3 @@ ylim(0, 0.95) ylabel("$|\\overrightarrow{\\nabla}\\phi(r)|$", labelpad=0) savefig("potential.pdf") - - - - -#Construct potential -# phi = np.zeros(np.size(r)) -# for i in range(np.size(r)): -# if r[i] > 2*epsilon: -# phi[i] = 1./ r[i] -# elif r[i] > epsilon: -# phi[i] = -(1./epsilon) * ((32./3.)*u[i]**2 - (48./3.)*u[i]**3 + (38.4/4.)*u[i]**4 - (32./15.)*u[i]**5 + (2./30.)*u[i]**(-1) - (9/5.)) -# else: -# phi[i] = -(1./epsilon) * ((32./6.)*u[i]**2 - (38.4/4.)*u[i]**4 + (32./5.)*u[i]**4 - (7./5.)) diff --git a/theory/Multipoles/potential_derivatives.tex b/theory/Multipoles/potential_derivatives.tex index 56184ce98902d76ad53ce1d49e3d6d67dfc33ac4..9010f06e91e8e0df0d36b539dee09d7b5a67bede 100644 --- a/theory/Multipoles/potential_derivatives.tex +++ b/theory/Multipoles/potential_derivatives.tex @@ -1,4 +1,5 @@ -\subsection{Derivatives of the potential} +\section{Derivatives of the potential} +\label{sec:pot_derivatives} For completeness, we give here the full expression for the first few derivatives of the potential that are used in our FMM scheme. We use @@ -6,7 +7,7 @@ the notation $\mathbf{r}=(r_x, r_y, r_z)$, $r = |\mathbf{r}|$ and $u=r/H$. Starting from the potential (Eq. \ref{eq:fmm:potential}, reproduced here for clarity), \begin{align} -D_{000}(\mathbf{r}) = \phi (\mathbf{r},H) = +\mathsf{D}_{000}(\mathbf{r}) = \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} \frac{1}{H} \left(-3u^7 + 15u^6 - 28u^5 + 21u^4 - 7u^2 + 3\right) & \mbox{if} & u < 1,\\ \frac{1}{r} & \mbox{if} & u \geq 1, @@ -14,10 +15,11 @@ D_{000}(\mathbf{r}) = \phi (\mathbf{r},H) = \right.\nonumber \end{align} we can construct the higher order terms by successively applying the -"chain rule". We show examples of the first few relevant ones here. +"chain rule". We show representative examples of the first few +relevant ones here split by order. \begin{align} -D_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \phi (\mathbf{r},H) = +\mathsf{D}_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_x}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ -\frac{r_x}{r^3} & \mbox{if} & u \geq 1, @@ -25,8 +27,10 @@ D_{100}(\mathbf{r}) = \frac{\partial}{\partial r_x} \phi (\mathbf{r},H) = \right.\nonumber \end{align} +\noindent\rule{6cm}{0.4pt} + \begin{align} -D_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \phi (\mathbf{r},H) = +\mathsf{D}_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} \frac{r_x^2}{H^5}\left(-105u^3+360u^2-420u+168\right) - \frac{1}{H^3} \left(21u^5 - 90u^4 + 140u^3 - 84u^2 + 14\right) & \mbox{if} & u < 1,\\ @@ -36,7 +40,7 @@ D_{200}(\mathbf{r}) = \frac{\partial^2}{\partial r_x^2} \phi (\mathbf{r},H) = \end{align} \begin{align} -D_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \phi (\mathbf{r},H) = +\mathsf{D}_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} \frac{r_xr_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ 3\frac{r_xr_y}{r^5} & \mbox{if} & u \geq 1, @@ -44,8 +48,10 @@ D_{110}(\mathbf{r}) = \frac{\partial^2}{\partial r_x\partial r_y} \phi (\mathbf{ \right.\nonumber \end{align} +\noindent\rule{6cm}{0.4pt} + \begin{align} -D_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = +\mathsf{D}_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_x^3}{H^7} \left(315u - 720 + 420u^{-1}\right) + \frac{3r_x}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ @@ -55,7 +61,7 @@ D_{300}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = \end{align} \begin{align} -D_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = +\mathsf{D}_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_x^2r_y}{H^7} \left(315u - 720 + 420u^{-1}\right) + \frac{r_y}{H^5}\left(-105u^3+360u^2-420u+168\right) & \mbox{if} & u < 1,\\ @@ -66,10 +72,32 @@ D_{210}(\mathbf{r}) = \frac{\partial^3}{\partial r_x^3} \phi (\mathbf{r},H) = \begin{align} -D_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \phi (\mathbf{r},H) = +\mathsf{D}_{111}(\mathbf{r}) = \frac{\partial^3}{\partial r_x\partial r_y\partial r_z} \phi (\mathbf{r},H) = \left\lbrace\begin{array}{rcl} -\frac{r_xr_yr_z}{H^7} \left(315u - 720 + 420u^{-1}\right) & \mbox{if} & u < 1,\\ -15\frac{r_xr_yr_z}{r^7} & \mbox{if} & u \geq 1, \end{array} \right.\nonumber \end{align} + +\noindent\rule{6cm}{0.4pt} + +\begin{align} + \mathsf{D}_{400}(\mathbf{r}) &= + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{310}(\mathbf{r}) &= + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{220}(\mathbf{r}) &= + \nonumber +\end{align} + +\begin{align} + \mathsf{D}_{211}(\mathbf{r}) &= + \nonumber +\end{align} diff --git a/theory/Multipoles/potential_softening.tex b/theory/Multipoles/potential_softening.tex index 1186a9cec377fd8daa94e14d024115f95ecfdc99..bdfc27da290d23a8c5fbd1423c442f3948616001 100644 --- a/theory/Multipoles/potential_softening.tex +++ b/theory/Multipoles/potential_softening.tex @@ -1,4 +1,5 @@ \subsection{Gravitational softening} +\label{ssec:potential_softening} To avoid artificial two-body relaxation, the Dirac $\delta$-distribution of particles is convolved with a softening @@ -6,7 +7,10 @@ kernel of a given fixed, but time-variable, scale-length $\epsilon$. Instead of the commonly used spline kernel of \cite{Monaghan1985} (e.g. in \textsc{Gadget}), we use a C2 kernel \citep{Wendland1995} which leads to an expression for the force that -is cheaper to compute and has a very similar overall shape. We set +is cheaper to compute and has a very similar overall shape. The C2 +kernel has the advantage of being branch-free leading to an expression +which is faster to evaluate using vector units available on modern +architectures; it also does not require any division. We set $\tilde\delta(\mathbf{x}) = \rho(|\mathbf{x}|) = W(|\mathbf{x}|, 3\epsilon_{\rm Plummer})$, with $W(r, H)$ given by @@ -41,12 +45,13 @@ details see Sec. 2 of~\cite{Price2007}). \begin{figure} \includegraphics[width=\columnwidth]{potential.pdf} -\caption{The density (top), potential (middle) and forces (bottom) of -generated py a point mass in our softened gravitational scheme (for -completeness, we chose $\epsilon=2$). A -Plummer-equivalent sphere is shown for comparison. The spline kernel -of \citet{Monaghan1985}, used in \textsc{Gadget}, is shown for -comparison but note that it has not been re-scaled to match the -Plummer-sphere potential at $r=0$.} +\caption{The density (top), potential (middle) and forces (bottom) + generated py a point mass in our softened gravitational scheme. + A Plummer-equivalent sphere is shown for comparison. The spline + kernel of \citet{Monaghan1985}, used in \textsc{Gadget}, is shown + for comparison but note that it has not been re-scaled to match the + Plummer-sphere potential at $r=0$. %(for completeness, we chose + %$\epsilon=2$). + } \label{fig:fmm:softening} \end{figure} diff --git a/theory/Multipoles/run.sh b/theory/Multipoles/run.sh index f25d407cd4ffe679a272f352798817f7c0c4e55a..3b2156a40c4309a1fb21980fb14022f963ce4f51 100755 --- a/theory/Multipoles/run.sh +++ b/theory/Multipoles/run.sh @@ -1,5 +1,10 @@ #!/bin/bash -python potential.py +if [! -e potential.pdf ] +then + echo "Generating figures..." + python plot_potential.py +fi +echo "Generating PDF..." pdflatex -jobname=fmm fmm_standalone.tex bibtex fmm.aux pdflatex -jobname=fmm fmm_standalone.tex diff --git a/theory/Multipoles/vector_notation.tex b/theory/Multipoles/vector_notation.tex new file mode 100644 index 0000000000000000000000000000000000000000..4c17a1b92ad7576ac3aaa02b8d02993acfcd795a --- /dev/null +++ b/theory/Multipoles/vector_notation.tex @@ -0,0 +1,34 @@ +\section{Multi-index notation} +\label{sec:multi_index_notation} + +We define a multi-index $\mathbf{n}$ as a triplet of integers +non-negative integers: +\begin{equation} + \mathbf{n} \equiv \left(n_x, n_y, n_z\right), \qquad n_i \in \mathbb{N}, +\end{equation} +with a norm $n$ given by +\begin{equation} + n = |\mathbf{n}| \equiv n_x + n_y + n_z. +\end{equation} +We also define the exponentiation of a vector +$\mathbf{r}=(r_x,r_y,r_z)$ by a multi-index $\mathbf{n}$ as +\begin{equation} + \mathbf{r}^\mathbf{n} \equiv r_x^{n_x} \cdot r_y^{n_y} \cdot r_z^{n_z}, +\end{equation} +which for a scalar $\alpha$ reduces to +\begin{equation} + \alpha^\mathbf{n} = \alpha^{n}. +\end{equation} +Finally, the factiorial of a multi-index is defined to be +\begin{equation} + \mathbf{n}! \equiv n_x! \cdot n_y! \cdot n_z!, +\end{equation} +which leads to a simple expression for the binomial coefficients of +two multi-indices entering Taylor expansions: +\begin{equation} + \binom{\mathbf{n}}{\mathbf{k}} = \binom{n_x}{k_x}\binom{n_y}{k_y}\binom{n_z}{k_z}. +\end{equation} +When appearing as the index in a sum, a multi-index represents all +values that the triplet can take up to a given norm. For instance, +$\sum_{\mathbf{n}}^{p}$ indicates that the sum runs over all possible +multi-indices whose norm is $\leq p$.