diff --git a/src/engine.c b/src/engine.c
index e1e1e6c6c500d6280ad91dafc701f76dcd36c88c..1381cd8dcd9cf1892177b0de81e04b11fbc50008 100644
--- a/src/engine.c
+++ b/src/engine.c
@@ -326,21 +326,21 @@ void engine_prepare ( struct engine *e ) {
 
     /* Run through the tasks and mark as skip or not. */
     // tic = getticks();
-    rebuild = 1 || ( e->step == 0 || engine_marktasks( e ) );
+    rebuild = ( e->step == 0 || engine_marktasks( e ) );
     // printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
         
     /* Did this not go through? */
     if ( rebuild ) {
     
         /* Re-build the space. */
-        tic = getticks();
+        // tic = getticks();
         space_rebuild( e->s , 0.0 );
-        printf( "engine_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
+        // printf( "engine_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
     
         /* Re-build the tasks. */
-        tic = getticks();
+        // tic = getticks();
         engine_maketasks( e );
-        printf( "engine_prepare: engine_maketasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
+        // printf( "engine_prepare: engine_maketasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
     
         /* Run through the tasks and mark as skip or not. */
         // tic = getticks();
@@ -369,35 +369,31 @@ void engine_prepare ( struct engine *e ) {
  * @param e The #engine.
  */
  
-void engine_barrier( struct engine *e ) {
+void engine_barrier ( struct engine *e ) {
 
     /* First, get the barrier mutex. */
     if ( pthread_mutex_lock( &e->barrier_mutex ) != 0 )
         error( "Failed to get barrier mutex." );
         
-    /* Wait for the barrier to close. */
-    while ( e->barrier_count < 0 )
-        if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
-            error( "Eror waiting for barrier to close." );
+    /* This thread is no longer running. */
+    e->barrier_running -= 1;
         
-    /* Once I'm in, increase the barrier count. */
-    e->barrier_count += 1;
-    
     /* If all threads are in, send a signal... */
-    if ( e->barrier_count == e->nr_threads )
+    if ( e->barrier_running == 0 )
         if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
             error( "Failed to broadcast barrier full condition." );
         
-    /* Wait for barrier to be released. */
-    while ( e->barrier_count > 0 )
+    /* Wait for the barrier to open. */
+    while ( e->barrier_launch == 0 )
         if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
-            error( "Error waiting for barrier to be released." );
-            
-    /* Decrease the counter before leaving... */
-    e->barrier_count += 1;
+            error( "Eror waiting for barrier to close." );
+        
+    /* This thread has been launched. */
+    e->barrier_running += 1;
+    e->barrier_launch -= 1;
     
     /* If I'm the last one out, signal the condition again. */
-    if ( e->barrier_count == 0 )
+    if ( e->barrier_launch == 0 )
         if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
             error( "Failed to broadcast empty barrier condition." );
             
@@ -556,6 +552,28 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest
     fflush(stdout);
     
     }
+    
+    
+/**
+ * @breif Launch the runners.
+ *
+ * @param e The #engine.
+ * @param nr_runners The number of #runners to let loose.
+ */
+ 
+void engine_launch ( struct engine *e , int nr_runners ) {
+
+    /* Cry havoc and let loose the dogs of war. */
+    e->barrier_launch = nr_runners;
+    if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
+        error( "Failed to broadcast barrier open condition." );
+        
+    /* Sit back and wait for the runners to come home. */
+    while ( e->barrier_launch || e->barrier_running )
+        if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
+            error( "Error while waiting for barrier." );
+            
+    }
 
 
 /**
@@ -597,12 +615,7 @@ void engine_step ( struct engine *e ) {
     /* First kick. */
     TIMER_TIC
     scheduler_start( &e->sched , (1 << task_type_kick1) );
-    e->barrier_count = -e->barrier_count;
-    if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
-        error( "Failed to broadcast barrier open condition." );
-    while ( e->barrier_count < e->nr_threads )
-        if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
-            error( "Error while waiting for barrier." );
+    engine_launch( e , ( e->nr_threads > 16 ) ? 16 : e->nr_threads );
     TIMER_TOC( timer_kick1 );
     
     /* Check if all the kick1 threads have executed. */
@@ -620,22 +633,11 @@ void engine_step ( struct engine *e ) {
     
     // engine_single_density( e->s->dim , 3392063069037 , e->s->parts , e->s->nr_parts , e->s->periodic );
 
-    /* Start the clock. */
+    /* Send off the runners. */
     TIMER_TIC_ND
-    
-    /* Cry havoc and let loose the dogs of war. */
-    e->barrier_count = -e->barrier_count;
-    if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
-        error( "Failed to broadcast barrier open condition." );
-        
-    /* Sit back and wait for the runners to come home. */
-    while ( e->barrier_count < e->nr_threads )
-        if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
-            error( "Error while waiting for barrier." );
-            
-    /* Stop the clock. */
+    engine_launch( e , e->nr_threads );
     TIMER_TOC(timer_runners);
-
+            
     // engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic );
     
     // for(k=0; k<10; ++k)
@@ -746,7 +748,8 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
         error( "Failed to initialize barrier condition variable." );
     if ( pthread_mutex_lock( &e->barrier_mutex ) != 0 )
         error( "Failed to lock barrier mutex." );
-    e->barrier_count = 0;
+    e->barrier_running = 0;
+    e->barrier_launch = 0;
     
     /* Run through the parts and get the minimum time step. */
     e->dt_orig = dt;
@@ -773,6 +776,7 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
     for ( k = 0 ; k < nr_threads ; k++ ) {
         e->runners[k].id = k;
         e->runners[k].e = e;
+        e->barrier_running += 1;
         if ( pthread_create( &e->runners[k].thread , NULL , &runner_main , &e->runners[k] ) != 0 )
             error( "Failed to create runner thread." );
         #if defined(HAVE_SETAFFINITY)
@@ -787,7 +791,7 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
         }
         
     /* Wait for the runner threads to be in place. */
-    while ( e->barrier_count != e->nr_threads )
+    while ( e->barrier_running || e->barrier_launch )
         if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
             error( "Error while waiting for runner threads to get in place." );
     
diff --git a/src/engine.h b/src/engine.h
index ade67006c7c1b1ebdd6f28dd2b083493acb4bae1..b903ee21d4b418437bd3fbfe14c4ddc42cd0ca46 100644
--- a/src/engine.h
+++ b/src/engine.h
@@ -74,7 +74,7 @@ struct engine {
     /* Data for the threads' barrier. */
     pthread_mutex_t barrier_mutex;
     pthread_cond_t barrier_cond;
-    int barrier_count;
+    volatile int barrier_running, barrier_launch;
     
     };
 
diff --git a/src/scheduler.c b/src/scheduler.c
index aef13a10bacc5147fb5ed17a5d71b3eb846319ee..76b2fd80659a59f68a188d2f3e22d6b048c6e44e 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -703,10 +703,10 @@ void scheduler_done ( struct scheduler *s , struct task *t ) {
         }
         
     /* Task definitely done. */
-    // pthread_mutex_lock( &s->sleep_mutex );
+    pthread_mutex_lock( &s->sleep_mutex );
     atomic_dec( &s->waiting );
-    // pthread_cond_broadcast( &s->sleep_cond );
-    // pthread_mutex_unlock( &s->sleep_mutex );
+    pthread_cond_broadcast( &s->sleep_cond );
+    pthread_mutex_unlock( &s->sleep_mutex );
 
     }
 
@@ -754,10 +754,10 @@ struct task *scheduler_gettask ( struct scheduler *s , int qid ) {
             }
             
         /* If we failed, take a short nap. */
-        /* pthread_mutex_lock( &s->sleep_mutex );
+        pthread_mutex_lock( &s->sleep_mutex );
         if ( s->waiting > 0 )
             pthread_cond_wait( &s->sleep_cond , &s->sleep_mutex );
-        pthread_mutex_unlock( &s->sleep_mutex ); */
+        pthread_mutex_unlock( &s->sleep_mutex );
         
         }
         
diff --git a/src/space.c b/src/space.c
index 40ed7412efb66180c1749e5c332c774554619aa2..bcfc20201434cb2633b56f19c83881f926ae3867 100644
--- a/src/space.c
+++ b/src/space.c
@@ -161,10 +161,10 @@ void space_rebuild ( struct space *s , double cell_max ) {
     struct part *restrict finger, *restrict p, *parts = s->parts;
     int *ind;
     double ih[3], dim[3];
-    ticks tic;
+    // ticks tic;
     
     /* Be verbose about this. */
-    printf( "space_rebuild: (re)building space...\n" ); fflush(stdout);
+    // printf( "space_rebuild: (re)building space...\n" ); fflush(stdout);
     
     /* Run through the parts and get the current h_max. */
     // tic = getticks();
@@ -260,7 +260,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
         }
         
     /* Run through the particles and get their cell index. */
-    tic = getticks();
+    // tic = getticks();
     if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL )
         error( "Failed to allocate temporary particle indices." );
     ih[0] = s->ih[0]; ih[1] = s->ih[1]; ih[2] = s->ih[2];
@@ -277,12 +277,12 @@ void space_rebuild ( struct space *s , double cell_max ) {
         ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] );
         atomic_inc( &s->cells[ ind[k] ].count );
         }
-    printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
+    // printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
 
     /* Sort the parts according to their cells. */
-    tic = getticks();
+    // tic = getticks();
     parts_sort( parts , ind , s->nr_parts , 0 , s->nr_cells-1 );
-    printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
+    // printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
     
     /* Verify sort. */
     /* for ( k = 1 ; k < nr_parts ; k++ ) {
@@ -308,7 +308,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
         
     /* At this point, we have the upper-level cells, old or new. Now make
        sure that the parts in each cell are ok. */
-    tic = getticks();
+    // tic = getticks();
     k = 0;
     #pragma omp parallel num_threads(8) shared(s,k)
     {
@@ -320,7 +320,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
                 break;
             }
         }
-    printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
+    // printf( "space_rebuild: space_split took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
         
     }