diff --git a/paper/paper.tex b/paper/paper.tex index 3abb6329be29729b81468c383a2d13e46aae375c..992d2f7858c4b0f3a43f5d13718241ddfa1b67d0 100644 --- a/paper/paper.tex +++ b/paper/paper.tex @@ -1172,12 +1172,32 @@ The timings are for {\tt qsched\_run}, including the cost of Setting up the scheduler, tasks, and resources took, in all cases, an average of 51.3\,ms. +For comparison, the same computations were run using the popular +astrophysics simulation software Gadget-2 \cite{ref:Springel2005}, +using a traditional Barnes-Hut implementation based on octrees +and distributed-memory parallelism based on domain decompositions +and MPI \cite{ref:Snir1998}. +To achieve the same accuracy, an opening angle of 0.5 was used. +On a single core, the task-based tree traversal is already 1.9$\times$ +faster than Gadget-2, due to the cache efficiency of the task-based +computations, which, by design, maximize the amount of computation +per memory access. +At 59 cores, where Gadget-2 performs best, the task-based tree traversal is +2.51$\times$ faster, and at the full 64 cores it is 4$\times$ faster, +due to the better strong scaling of the task-based approach as opposed +to the MPI-based parallelism in Gadget-2. + \begin{figure} \centerline{\epsfig{file=figures/BH_scaling.pdf,width=0.9\textwidth}} \caption{Strong scaling and parallel efficiency of the Barnes-Hut tree-code computed over 1\,000\,000 particles. Solving the N-Body problem takes 323\,ms, achieving 75\% parallel efficiency, over all 64 cores. + For comparison, timings are shown for the same computation using + the popular astrophysics code Gadget-2. + The scaling for Gadget-2 (left) is shown relative to the performance of + QuickSched, whereas the parallel efficiency (right) is computed relative + to Gadget-2 on a single core. } \label{fig:BHResults} \end{figure} diff --git a/paper/quicksched.bib b/paper/quicksched.bib index 22f9e56cca9964a1741170944bdcb2be7b30c051..4fe4def8cfb92b210fa8893baff0abc9a5155915 100644 --- a/paper/quicksched.bib +++ b/paper/quicksched.bib @@ -1,3 +1,22 @@ +@book{ref:Snir1998, + title={{MPI}: The Complete Reference (Vol. 1): Volume 1-The {MPI} Core}, + author={Snir, Marc and Otto, Steve and Huss-Lederman, Steven and Walker, David and Dongarra, Jack}, + volume={1}, + year={1998}, + publisher={MIT press} + } + +@article{ref:Springel2005, + title={The cosmological simulation code GADGET-2}, + author={Springel, Volker}, + journal={Monthly Notices of the Royal Astronomical Society}, + volume={364}, + number={4}, + pages={1105--1134}, + year={2005}, + publisher={Oxford University Press} +} + @inproceedings{ref:Agullo2011, title = {{QR Factorization on a Multicore Node Enhanced with Multiple GPU Accelerators}}, author = {Agullo, Emmanuel and Augonnet, C{\'e}dric and Dongarra, Jack and Faverge, Mathieu and Ltaief, Hatem and Thibault, Samuel and Tomov, Stanimire},