diff --git a/paper/paper.tex b/paper/paper.tex
index 3abb6329be29729b81468c383a2d13e46aae375c..992d2f7858c4b0f3a43f5d13718241ddfa1b67d0 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -1172,12 +1172,32 @@ The timings are for {\tt qsched\_run}, including the cost of
 Setting up the scheduler, tasks, and resources took, in all
 cases, an average of 51.3\,ms.
 
+For comparison, the same computations were run using the popular
+astrophysics simulation software Gadget-2 \cite{ref:Springel2005},
+using a traditional Barnes-Hut implementation based on octrees
+and distributed-memory parallelism based on domain decompositions
+and MPI \cite{ref:Snir1998}.
+To achieve the same accuracy, an opening angle of 0.5 was used.
+On a single core, the task-based tree traversal is already 1.9$\times$
+faster than Gadget-2, due to the cache efficiency of the task-based
+computations, which, by design, maximize the amount of computation
+per memory access.
+At 59 cores, where Gadget-2 performs best, the task-based tree traversal is
+2.51$\times$ faster, and at the full 64 cores it is 4$\times$ faster,
+due to the better strong scaling of the task-based approach as opposed
+to the MPI-based parallelism in Gadget-2.
+
 \begin{figure}
     \centerline{\epsfig{file=figures/BH_scaling.pdf,width=0.9\textwidth}}
     \caption{Strong scaling and parallel efficiency of the Barnes-Hut tree-code
         computed over 1\,000\,000 particles.
         Solving the N-Body problem takes 323\,ms, achieving 75\% parallel
         efficiency, over all 64 cores.
+        For comparison, timings are shown for the same computation using
+        the popular astrophysics code Gadget-2.
+        The scaling for Gadget-2 (left) is shown relative to the performance of
+        QuickSched, whereas the parallel efficiency (right) is computed relative
+        to Gadget-2 on a single core.
         }
     \label{fig:BHResults}
 \end{figure}
diff --git a/paper/quicksched.bib b/paper/quicksched.bib
index 22f9e56cca9964a1741170944bdcb2be7b30c051..4fe4def8cfb92b210fa8893baff0abc9a5155915 100644
--- a/paper/quicksched.bib
+++ b/paper/quicksched.bib
@@ -1,3 +1,22 @@
+@book{ref:Snir1998,
+    title={{MPI}: The Complete Reference (Vol. 1): Volume 1-The {MPI} Core},
+    author={Snir, Marc and Otto, Steve and Huss-Lederman, Steven and Walker, David and Dongarra, Jack},
+    volume={1},
+    year={1998},
+    publisher={MIT press}
+    }
+
+@article{ref:Springel2005,
+  title={The cosmological simulation code GADGET-2},
+  author={Springel, Volker},
+  journal={Monthly Notices of the Royal Astronomical Society},
+  volume={364},
+  number={4},
+  pages={1105--1134},
+  year={2005},
+  publisher={Oxford University Press}
+}
+
 @inproceedings{ref:Agullo2011,
     title = {{QR Factorization on a Multicore Node Enhanced with Multiple GPU Accelerators}},
     author = {Agullo, Emmanuel and Augonnet, C{\'e}dric and Dongarra, Jack and Faverge, Mathieu and Ltaief, Hatem and Thibault, Samuel and Tomov, Stanimire},