diff --git a/src/runner.c b/src/runner.c
index 3303b34e4c156c33147b7a206f3286e612537550..d6154f5c4095e0dd18b442c5b404f6b7e7c58c13 100644
--- a/src/runner.c
+++ b/src/runner.c
@@ -330,7 +330,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags , int clock )
  
 void runner_doghost ( struct runner *r , struct cell *c ) {
 
-    struct part *p;
+    struct part *p, *parts = c->parts;
     struct cell *finger;
     int i, k, redo, count = c->count;
     int *pid;
@@ -360,10 +360,18 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
         redo = 0;
     
         /* Loop over the parts in this cell. */
+        __builtin_prefetch( &parts[ pid[0] ] , 0 , 1 );
+        __builtin_prefetch( &parts[ pid[0] ].rho_dh , 0 , 1 );
+        __builtin_prefetch( &parts[ pid[1] ] , 0 , 1 );
+        __builtin_prefetch( &parts[ pid[1] ].rho_dh , 0 , 1 );
+        __builtin_prefetch( &parts[ pid[2] ] , 0 , 1 );
+        __builtin_prefetch( &parts[ pid[2] ].rho_dh , 0 , 1 );
         for ( i = 0 ; i < count ; i++ ) {
 
             /* Get a direct pointer on the part. */
-            p = &c->parts[ pid[i] ];
+            __builtin_prefetch( &parts[ pid[i+3] ] , 0 , 1 );
+            __builtin_prefetch( &parts[ pid[i+3] ].rho_dh , 0 , 1 );
+            p = &parts[ pid[i] ];
             
             /* Is this part within the timestep? */
             if ( p->dt <= dt_step ) {
@@ -458,16 +466,16 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
                 
                     /* Self-interaction? */
                     if ( finger->density[k]->type == task_type_self )
-                        runner_doself_subset_density( r , finger , c->parts , pid , count );
+                        runner_doself_subset_density( r , finger , parts , pid , count );
                         
                     /* Otherwise, pair interaction? */
                     else if ( finger->density[k]->type == task_type_pair ) {
                     
                         /* Left or right? */
                         if ( finger->density[k]->ci == finger )
-                            runner_dopair_subset_density( r , finger , c->parts , pid , count , finger->density[k]->cj );
+                            runner_dopair_subset_density( r , finger , parts , pid , count , finger->density[k]->cj );
                         else
-                            runner_dopair_subset_density( r , finger , c->parts , pid , count , finger->density[k]->ci );
+                            runner_dopair_subset_density( r , finger , parts , pid , count , finger->density[k]->ci );
                         
                         }
                 
@@ -476,9 +484,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
                     
                         /* Left or right? */
                         if ( finger->density[k]->ci == finger )
-                            runner_dosub_subset_density( r , finger , c->parts , pid , count , finger->density[k]->cj , -1 , 1 );
+                            runner_dosub_subset_density( r , finger , parts , pid , count , finger->density[k]->cj , -1 , 1 );
                         else
-                            runner_dosub_subset_density( r , finger , c->parts , pid , count , finger->density[k]->ci , -1 , 1 );
+                            runner_dosub_subset_density( r , finger , parts , pid , count , finger->density[k]->ci , -1 , 1 );
                         
                         }
                 
@@ -794,6 +802,16 @@ void *runner_main ( void *data ) {
             ci = t->ci;
             cj = t->cj;
             
+            /* Prefetch? */
+            if ( runner_prefetch &&
+                 t->type != task_type_kick1 && t->type != task_type_kick2 && t->type != task_type_ghost ) {
+                for ( int k = 0 ; k < ci->count ; k++ )
+                    __builtin_prefetch( &ci->parts[k] , 1 , 3 );
+                if ( cj != NULL )
+                    for ( int k = 0 ; k < cj->count ; k++ )
+                        __builtin_prefetch( &cj->parts[k] , 1 , 3 );
+                }
+            
             /* Different types of tasks... */
             switch ( t->type ) {
                 case task_type_self:
diff --git a/src/runner.h b/src/runner.h
index c6d2e656ab1d1a40dc790be29c096341c00ef859..c8cf5a6808c4da77bbc6d9471d384edebe75d061 100644
--- a/src/runner.h
+++ b/src/runner.h
@@ -19,6 +19,9 @@
 
 #include "inline.h"
 
+/* Some constants/flags. */
+#define runner_prefetch                 1
+
 /* SID stuff. */
 extern const char runner_flip[];