Skip to content
Snippets Groups Projects
Commit 70d121cd authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

more work on manuscript.

Former-commit-id: be8549c54c1a26dcfa7c31b0c3478cea1183ec24
parent 84021614
No related branches found
No related tags found
No related merge requests found
File added
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
width="744.09448819"
height="1052.3622047"
id="svg2"
version="1.1"
inkscape:version="0.48.3.1 r9886"
sodipodi:docname="CellLocking.svg">
<defs
id="defs4">
<marker
inkscape:stockid="Arrow2Mend"
orient="auto"
refY="0.0"
refX="0.0"
id="Arrow2Mend"
style="overflow:visible;">
<path
id="path3806"
style="fill-rule:evenodd;stroke-width:0.62500000;stroke-linejoin:round;"
d="M 8.7185878,4.0337352 L -2.2072895,0.016013256 L 8.7185884,-4.0017078 C 6.9730900,-1.6296469 6.9831476,1.6157441 8.7185878,4.0337352 z "
transform="scale(0.6) rotate(180) translate(0,0)" />
</marker>
</defs>
<sodipodi:namedview
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="1.0"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="2"
inkscape:cx="319.93452"
inkscape:cy="689.70563"
inkscape:document-units="px"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:window-width="1110"
inkscape:window-height="996"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="0"
inkscape:snap-global="true">
<inkscape:grid
type="xygrid"
id="grid2985" />
</sodipodi:namedview>
<metadata
id="metadata7">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:title />
</cc:Work>
</rdf:RDF>
</metadata>
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<rect
style="fill:#ffcf00;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="rect2987"
width="20"
height="20"
x="310"
y="257.36218" />
<rect
y="302.36218"
x="325"
height="20"
width="20"
id="rect3757"
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
<rect
style="fill:#ffcf00;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;fill-opacity:1"
id="rect3759"
width="20"
height="20"
x="355"
y="302.36218" />
<rect
style="fill:#ff0000;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;fill-opacity:1"
id="rect3761"
width="20"
height="20"
x="265"
y="302.36218" />
<rect
y="302.36218"
x="295"
height="20"
width="20"
id="rect3763"
style="fill:#ffcf00;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;fill-opacity:1" />
<rect
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="rect3765"
width="20"
height="20"
x="400"
y="347.36218" />
<rect
y="347.36218"
x="430.5"
height="20"
width="20"
id="rect3767"
style="fill:#ff0000;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
<rect
y="347.36218"
x="340"
height="20"
width="20"
id="rect3769"
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
<rect
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="rect3771"
width="20"
height="20"
x="370"
y="347.36218" />
<path
sodipodi:nodetypes="cccc"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
d="m 440,347.36218 0,-10 -75,0 0,-15"
id="path4233"
inkscape:connector-curvature="0" />
<path
sodipodi:nodetypes="ccc"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 350,347.36218 0,-10 15,0"
id="path4235"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 365,337.36218 15,0 0,10"
id="path4237"
inkscape:connector-curvature="0" />
<path
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 380,337.36218 30,0 0,10"
id="path4239"
inkscape:connector-curvature="0" />
<path
inkscape:connector-curvature="0"
id="path4249"
d="m 275,302.36218 0,-10 45,0 0,-15"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Mend)"
sodipodi:nodetypes="cccc" />
<path
inkscape:connector-curvature="0"
id="path4251"
d="m 305,302.36218 0,-10"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<path
inkscape:connector-curvature="0"
id="path4253"
d="m 320,292.36218 15,0 0,10"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<path
inkscape:connector-curvature="0"
id="path4255"
d="m 335,292.36218 30,0 0,10"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<rect
y="347.36218"
x="265"
height="20"
width="20"
id="rect4275"
style="fill:#ff0000;fill-opacity:1;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
<rect
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="rect4277"
width="20"
height="20"
x="295"
y="347.36218" />
<rect
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0"
id="rect4279"
width="20"
height="20"
x="205"
y="347.36218" />
<rect
y="347.36218"
x="235"
height="20"
width="20"
id="rect4281"
style="fill:none;stroke:#000000;stroke-width:2;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0" />
<path
sodipodi:nodetypes="cccc"
inkscape:connector-curvature="0"
id="path4285"
d="m 215,347.36218 0,-10 90,0 0,-15"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow2Mend)" />
<path
inkscape:connector-curvature="0"
id="path4287"
d="m 245,347.36218 0,-10"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<path
sodipodi:nodetypes="ccc"
inkscape:connector-curvature="0"
id="path4289"
d="m 275,337.36218 0,0 0,10"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<path
inkscape:connector-curvature="0"
id="path4291"
d="m 275,337.36218 30,0 0,10"
style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" />
<text
xml:space="preserve"
style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
x="304.91455"
y="316.0072"
id="text4872"
sodipodi:linespacing="125%"><tspan
sodipodi:role="line"
id="tspan4874"
x="304.91455"
y="316.0072"
style="font-size:10px;text-align:center;text-anchor:middle">1</tspan></text>
<text
sodipodi:linespacing="125%"
id="text4876"
y="316.0072"
x="364.91455"
style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
xml:space="preserve"><tspan
style="font-size:10px;text-align:center;text-anchor:middle"
y="316.0072"
x="364.91455"
id="tspan4878"
sodipodi:role="line">1</tspan></text>
<text
xml:space="preserve"
style="font-size:12px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:end;line-height:125%;letter-spacing:0px;word-spacing:0px;text-anchor:end;fill:#000000;fill-opacity:1;stroke:none;font-family:Bitstream Vera Sans;-inkscape-font-specification:Bitstream Vera Sans"
x="319.91455"
y="271.0072"
id="text4880"
sodipodi:linespacing="125%"><tspan
sodipodi:role="line"
id="tspan4882"
x="319.91455"
y="271.0072"
style="font-size:10px;text-align:center;text-anchor:middle">3</tspan></text>
</g>
</svg>
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Title, author and affiliations % Title, author and affiliations
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\title{Fast Algorithms for Smoothed Particle Hydrodynamics on Multi-Core \title{Efficient and Scalable Algorithms for Smoothed Particle Hydrodynamics on Multi-Core
Architectures} Architectures}
\author{Pedro Gonnet\thanks{School of Engineering and Computing Sciences, \author{Pedro Gonnet\thanks{School of Engineering and Computing Sciences,
Durham University, Durham, Untied Kingdom ({\tt pedro.gonnet@durham.ac.uk}).}} Durham University, Durham, Untied Kingdom ({\tt pedro.gonnet@durham.ac.uk}).}}
...@@ -95,7 +95,11 @@ ...@@ -95,7 +95,11 @@
% Abstract % Abstract
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{abstract} \begin{abstract}
Bla. A new framework for the parallelization of Smoothed Particle Hydrodynamics (SPH)
simulations on shared-memory parallel architectures is described.
This framework relies on fast and cache-efficient cell-based neighbour-finding
algorithms, as well as task-based parallelism to achieve good scaling and
parallel efficiency on mult-core computers.
\end{abstract} \end{abstract}
...@@ -115,7 +119,7 @@ multi-cores ...@@ -115,7 +119,7 @@ multi-cores
\pagestyle{myheadings} \pagestyle{myheadings}
\thispagestyle{plain} \thispagestyle{plain}
\markboth{P. GONNET}{FAST ALGORITHMS FOR SPH} \markboth{P. GONNET}{EFFICIENT AND SCALABLE ALGORITHMS FOR SPH}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
...@@ -627,21 +631,17 @@ The dependencies and conflicts between tasks are then given as follows: ...@@ -627,21 +631,17 @@ The dependencies and conflicts between tasks are then given as follows:
\subsubsection{Task queues} \subsubsection{Task queues}
\begin{itemize} If the dependencies and conflicts are defined correctly, then
there is no risk of concurrency problems and thus each task
\item If the dependencies and conflicts are defined correctly, then can be implemented without special attention to the latter,
there is no risk of concurrency problems and thus each task e.g.~it can update data without using exclusinve access barriers
can be implemented without special attention to the latter, or atomic memory updates.
e.g.~it can update data without using exclusinve access barriers This, however, requires some care in how the individual tasks
or atomic memory updates. are allocated to the computing threads, i.e.~each task should
be allocated once to a single thread, and should not have
\item This, however, requires some care in how the individual tasks and unresolved dependencies, or conflict with any concurrently
are allocated to the computing threads, i.e.~each task should executing tasks.
be allocated once to a single thread, and should not have In the following, tasks will be stored in one or more {\em queues}:
and unresolved dependencies, or conflict with any concurrently
executing tasks.
\item In the following, tasks will be stored in one or more {\em queues}:
\begin{center}\begin{minipage}{0.8\textwidth} \begin{center}\begin{minipage}{0.8\textwidth}
\begin{lstlisting} \begin{lstlisting}
...@@ -660,7 +660,7 @@ in the queue. ...@@ -660,7 +660,7 @@ in the queue.
The {\tt pthread\_mutex\_t lock} is used to guarantee exclusive access The {\tt pthread\_mutex\_t lock} is used to guarantee exclusive access
to the queue. to the queue.
\item Task IDs are retreived from the queue as follows: Task IDs are retreived from the queue as follows:
\begin{center}\begin{minipage}{0.8\textwidth} \begin{center}\begin{minipage}{0.8\textwidth}
\begin{lstlisting} \begin{lstlisting}
...@@ -688,72 +688,145 @@ int queue_gettask ( struct queue *q , int steal ) { ...@@ -688,72 +688,145 @@ int queue_gettask ( struct queue *q , int steal ) {
\end{lstlisting} \end{lstlisting}
\end{minipage}\end{center} \end{minipage}\end{center}
\noindent i.e.~exclusive access to the queue is obtained by locking \noindent i.e.~exclusive access to the queue is obtained by locking
its mutex in line~2. In lines~3 to~6, the tasks are inspected its mutex in line~2. In lines~3 to~6, the tasks are inspected
in sequence until a task is found that has no unresolved in sequence until a task is found that has no unresolved
dependencies or existing conflicts. dependencies or existing conflicts.
If a task has been found, its ID is swapped with that at If a task has been found, its ID is swapped with that at
position {\tt next}, and {\tt next} is incremented by one position {\tt next}, and {\tt next} is incremented by one
(lines 8~to~11). (lines 8~to~11).
The lock on the queue is then released (line~12) and The lock on the queue is then released (line~12) and
the task ID, or {\tt -1} if no available task was found, is the task ID, or {\tt -1} if no available task was found, is
returned. returned.
\item The advantage of swapping the retreived task to the next The advantage of swapping the retreived task to the next
position in the list is that if the queue is reset, e.g.~{\tt next} position in the list is that if the queue is reset, e.g.~{\tt next}
is set to zero, and used again with the same set of tasks, is set to zero, and used again with the same set of tasks,
they will now be traversed in the order in which they were they will now be traversed in the order in which they were
exectuted in the previous run. exectuted in the previous run.
This provides a basic form of iterative refinement of the task This provides a basic form of iterative refinement of the task
order. order.
The tasks can also be sorted topologically, according to their
\item The tasks can also be sorted topologically, according to their dependency graph, to help minimize the effort required to find
dependency graph, to help minimize the effort required to find a valid task.
a valid task.
The mutex at the start of {\tt queue\_gettask} is a potential
\item The mutex at the start of {\tt queue\_gettask} is a potential bottleneck if the time required to process a task is small
bottleneck if the time required to process a task is small compared to the time required for all the threads to obtain
compared to the time required for all the threads to obtain a task, e.g.~for large numbers of very small tasks and/or
a task, e.g.~for large numbers of very small tasks and/or a large number of threads.
a large number of threads. One way of avoiding this problem is to use several concurrent
queues, e.g.~one queue per thread, and spread the tasks over
\item One way of avoiding this problem is to use several concurrent all queues.
queues, e.g.~one queue per thread, and spread the tasks over A fixed assignemnt of tasks to queues can, however,
all queues. cause load balancing problems, e.g.~when a thread's queue is
empty before the others have finished.
\item A fixed assignemnt of tasks to queues can, however, In order to avoid such problems, {\em work-stealing} can be used:
cause load balancing problems, e.g.~when a thread's queue is If a thread cannot obtain a task from its own queue, it picks
empty before the others have finished. another queue at random and tries to {\em steal} a task from it
i.e. if it can obtain a task, it removes it from the queue and
\item In order to avoid such problems, {\em work-stealing} can be used: adds it to it's own queue, thus iteratively rebalancing
If a thread cannot obtain a task from its own queue, it picks the task queues if they are used repeatedly:
another queue at random and tries to {\em steal} a task from it
i.e. if it can obtain a task, it removes it from the queue and
adds it to it's own queue, thus iteratively rebalancing
the task queues if they are used repeatedly:
\begin{center}\begin{minipage}{0.8\textwidth} \begin{center}\begin{minipage}{0.8\textwidth}
\begin{lstlisting} \begin{lstlisting}
while ( there is still a task in any of the queues ) { while ( there is still a task in any of the queues ) {
if ( ( tid = queue_gettask( myq , 0 ) ) < 0 ) {
randq = pick a non-empty queue at random.
if ( ( tid = queue_gettask( randq , 1 ) ) >= 0 )
queue_addtask( myq , tid );
} }
if ( tid >= 0 )
execute task tid.
}
\end{lstlisting} \end{lstlisting}
\end{minipage}\end{center} \end{minipage}\end{center}
\end{itemize} \noindent where {\tt myq} is the queue associated with the
current thread and {\tt queue\_addtask} adds a task ID
to the given queue.
\subsubsection{Cell locking} \subsubsection{Cell locking}
\begin{itemize} Particles within a cell are also within that cell's hierarchical
parents.
\item Explain problem of hierarchical locking, i.e.~interactions Therefore, when working on the particles of a cell, tasks which
involving higher-level cells exclude lower-level cells. operate on its parent's data should not be allowed to execute.
One way to avoid this problem is to require that a task
not only lock a cell, but also all of its hierarchical
parents in order to operate on its data.
This, however, would prevent tasks involving siblings,
whose particle sets do not overlap, from executing.
We avoid this problem by giving each cell both a {\em lock},
and a {\em hold} counter:
\item Two-phase locking up and down the cell hierarchy, with \begin{center}\begin{minipage}{0.8\textwidth}
``lock'' and ``hold'' states. \begin{lstlisting}
int cell_locktree ( struct cell c ) {
struct cell *c1, *c2;
if ( trylock( c->lock ) != 0 )
return 1;
if ( c->hold > 0 ) {
unlock( c->lock )
return 1;
}
for ( c1 = c->parent ; c1 != NULL ; c1 = c1->parent ) {
if ( trylock( c1->lock ) != 0 )
break;
atomic_add( c1->hold , 1 );
unlock( c1->lock );
}
if ( finger != NULL ) {
for ( c2 = c->parent ; c2 != c1 ; c2 = c2->parent )
atomic_sub( c2->hold , 1 );
unlock( c->lock );
return 1;
}
else
return 0;
}
\end{lstlisting}
\end{minipage}\end{center}
\end{itemize} \noindent When trying to lock a cell, we first check that it is neither
locked (line 3) or held (line 5), i.e.~its hold counter is zero, and lock it.
We then travel up the hierarchy increasing the
hold counter of each cell on the way, up to the topmost cell (lines 9--14).
If any cell along the hierarchy is locked (line 10), the locking is aborted
and all locks and holds are undone (lines 15--20, see \fig{CellLocking}).
The operations {\tt atomic\_add} and {\tt atomic\_sub} are understood,
respectively, to increase or decrease a value atomically.
When the cell is released, its lock is unlocked and the hold
counter of all hierarchical parents is decreased by one:
\begin{center}\begin{minipage}{0.8\textwidth}
\begin{lstlisting}
void cell_unlocktree ( struct cell c ) {
struct cell *c1;
unlock( c->lock )
for ( c1 = c->parent ; c1 != NULL ; c1 = c1->parent ) {
atomic_sub( c1->hold , 1 );
}
\end{lstlisting}
\end{minipage}\end{center}
\begin{figure}[ht]
\centerline{\epsfig{file=figures/CellLocking.pdf,width=0.5\textwidth}}
\caption{Example of hierarchical cell locking. The cells marked in red
are ``locked'' while the cells marked in yellow have a ``hold'' count
larger than zero.
The hold count is shown inside each cell and corresponds to the number
of locked cells hierarchicaly below it.
All cells except for those locked or with a ``hold'' count larger than
zero can still be locked without causing concurrent data access.
}
\label{fig:CellLocking}
\end{figure}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
...@@ -765,7 +838,14 @@ int queue_gettask ( struct queue *q , int steal ) { ...@@ -765,7 +838,14 @@ int queue_gettask ( struct queue *q , int steal ) {
\begin{itemize} \begin{itemize}
\item Details of the task queues. \item Implemented in C, compiled with {\tt gcc}.
\item Threading implemented with {\tt pthread}.
\item One task queue per thread.
\item As of yet, no use of SIMD capabilities to evaluate several
interactions at a time.
\item Details of the pair interactions. \item Details of the pair interactions.
...@@ -778,7 +858,11 @@ int queue_gettask ( struct queue *q , int steal ) { ...@@ -778,7 +858,11 @@ int queue_gettask ( struct queue *q , int steal ) {
\begin{itemize} \begin{itemize}
\item Show both large and small simulation setups. \item Details of the simulation used, e.g. size, number of particles,
etc...
\item So far only considering density and force computation,
particles not moving.
\end{itemize} \end{itemize}
...@@ -801,7 +885,11 @@ int queue_gettask ( struct queue *q , int steal ) { ...@@ -801,7 +885,11 @@ int queue_gettask ( struct queue *q , int steal ) {
\begin{itemize} \begin{itemize}
\item Bla. \item Good scaling.
\item Computational model can easily be exported to other architectures,
including GPUs (reference task-based parallelism on GPUs with Aidan),
and other multi-core accelerators such as the Intel MIC.
\end{itemize} \end{itemize}
...@@ -811,7 +899,14 @@ int queue_gettask ( struct queue *q , int steal ) { ...@@ -811,7 +899,14 @@ int queue_gettask ( struct queue *q , int steal ) {
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section*{Acknowledgments} \section*{Acknowledgments}
ICC Durham, Nick Holliman's hardware, NVidia's hardware. \begin{itemize}
\item Collaboration with Matthieu Schaller and Tom Theums from the
Institute of Computational Cosmology (ICC) at Durham University.
\item Lydia Heck from the ICC for providing access to the infrastructure.
\end{itemize}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment