diff --git a/src/cell.c b/src/cell.c index ba1a631037cce6cf7e86b9b1ad556ea97a2276a3..c8fb2901cb3a5796ce3bb099955e23bd6520a02d 100644 --- a/src/cell.c +++ b/src/cell.c @@ -38,7 +38,7 @@ /* Error macro. */ -#define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /* The timers. */ diff --git a/src/engine.c b/src/engine.c index 1001b9c87a13b67cf3d5d376e99e7081d681d64a..8343cda8f91b1587e310ffd064f7813295dd7131 100644 --- a/src/engine.c +++ b/src/engine.c @@ -45,7 +45,7 @@ #include "runner_iact.h" /* Error macro. */ -#define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /* Convert cell location to ID. */ #define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) ) @@ -206,7 +206,6 @@ void engine_run ( struct engine *e , int sort_queues ) { /* Run throught the tasks and get all the waits right. */ for ( k = 0 ; k < s->nr_tasks ; k++ ) { - s->tasks[k].done = 0; for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ ) s->tasks[k].unlock_tasks[j]->wait += 1; for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ ) diff --git a/src/ic.c b/src/ic.c index c9c7d8559fbe50340ad0d78af9c6bbca81e8cbc1..853311484b75b299d81b62839d88a7515d5a699c 100644 --- a/src/ic.c +++ b/src/ic.c @@ -32,13 +32,13 @@ #include <hdf5.h> -#include "task.h" #include "lock.h" +#include "task.h" #include "part.h" #include "space.h" /* Error macro. */ -#define error(s) { printf( "%s:%s():%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s():%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /** diff --git a/src/lock.h b/src/lock.h index b98887999c3e4e67b00c4e21ef63b34037db3ea5..dc364177e1057583724e2268ee6d8c1093b77ee2 100644 --- a/src/lock.h +++ b/src/lock.h @@ -35,6 +35,7 @@ #define lock_lock( l ) ( pthread_spin_lock( l ) != 0 ) #define lock_trylock( l ) ( pthread_spin_lock( l ) != 0 ) #define lock_unlock( l ) ( pthread_spin_unlock( l ) != 0 ) + #define lock_unlock_blind( l ) pthread_spin_unlock( l ) #else #define lock_type volatile int #define lock_init( l ) ( *l = 0 ) @@ -46,4 +47,5 @@ } #define lock_trylock( l ) ( ( *(l) ) ? 1 : __sync_val_compare_and_swap( l , 0 , 1 ) ) #define lock_unlock( l ) ( __sync_val_compare_and_swap( l , 1 , 0 ) != 1 ) + #define lock_unlock_blind( l ) __sync_val_compare_and_swap( l , 1 , 0 ) #endif diff --git a/src/queue.c b/src/queue.c index d8939150fd30b6b5246d1868df14d7d3bb546552..79edee4d282af7423c1c03ed4e70adf534de3519 100644 --- a/src/queue.c +++ b/src/queue.c @@ -39,7 +39,7 @@ #include "queue.h" /* Error macro. */ -#define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /* Define the timer macros. */ #ifdef TIMER_VERBOSE @@ -147,7 +147,7 @@ void queue_init ( struct queue *q , int size , struct task *tasks ) { * @param keep Remove the returned task from this queue. */ -struct task *queue_gettask ( struct queue *q , int blocking , int keep ) { +struct task *queue_gettask_old ( struct queue *q , int blocking , int keep ) { int k, tid = -1, qcount, *qtid = q->tid; lock_type *qlock = &q->lock; @@ -267,7 +267,7 @@ struct task *queue_gettask ( struct queue *q , int blocking , int keep ) { } -struct task *queue_gettask_new ( struct queue *q , int rid , int blocking , int keep ) { +struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ) { int k, tid = -1, qcount, *qtid = q->tid; lock_type *qlock = &q->lock; diff --git a/src/queue.h b/src/queue.h index e8fc4930eed87a3c4b2f62846ae46c1ea536a8d8..491a8e718fa883cadd1cdbfd91ac39918d846f87 100644 --- a/src/queue.h +++ b/src/queue.h @@ -59,8 +59,8 @@ struct queue { /* Function prototypes. */ -struct task *queue_gettask ( struct queue *q , int blocking , int keep ); -struct task *queue_gettask_new ( struct queue *q , int rid , int blocking , int keep ); +struct task *queue_gettask_old ( struct queue *q , int blocking , int keep ); +struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ); void queue_init ( struct queue *q , int size , struct task *tasks ); void queue_insert ( struct queue *q , struct task *t ); void queue_sort ( struct queue *q ); diff --git a/src/runner.c b/src/runner.c index 2c2a4221bb6a84849b8e884480d68080d17cc32d..ae765c14d98ca428ed20f6c193e18c7865592d7b 100644 --- a/src/runner.c +++ b/src/runner.c @@ -45,7 +45,7 @@ #include "runner_iact.h" /* Error macro. */ -#define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /* Convert cell location to ID. */ #define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) ) @@ -508,11 +508,11 @@ void *runner_main ( void *data ) { TIMER_TIC t = NULL; if ( e->nr_queues == 1 ) { - t = queue_gettask( &e->queues[0] , 1 , 0 ); + t = queue_gettask_old( &e->queues[0] , 1 , 0 ); } else if ( e->policy & engine_policy_steal ) { if ( ( myq->next == myq->count ) || - ( t = queue_gettask_new( myq , r->id , 0 , 0 ) ) == NULL ) { + ( t = queue_gettask( myq , r->id , 0 , 0 ) ) == NULL ) { TIMER_TIC2 qid = rand_r( &myseed ) % naq; keep = ( e->policy & engine_policy_keep ) && @@ -521,7 +521,7 @@ void *runner_main ( void *data ) { COUNT(runner_counter_steal_empty); else COUNT(runner_counter_steal_stall); - t = queue_gettask_new( queues[qid] , r->id , 0 , keep ); + t = queue_gettask( queues[qid] , r->id , 0 , keep ); if ( t != NULL && keep ) queue_insert( myq , t ); TIMER_TOC2(runner_timer_steal); @@ -529,10 +529,10 @@ void *runner_main ( void *data ) { } else if ( e->policy & engine_policy_rand ) { qid = rand_r( &myseed ) % naq; - t = queue_gettask( queues[qid] , e->policy & engine_policy_block , 0 ); + t = queue_gettask( queues[qid] , r->id , e->policy & engine_policy_block , 0 ); } else { - t = queue_gettask( &e->queues[threadID] , e->policy & engine_policy_block , 0 ); + t = queue_gettask( &e->queues[threadID] , r->id , e->policy & engine_policy_block , 0 ); } TIMER_TOC(runner_timer_getpair); @@ -602,8 +602,6 @@ void *runner_main ( void *data ) { error( "Unknown task type." ); } - t->done = 1; - /* Resolve any dependencies. */ for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) if ( __sync_fetch_and_sub( &t->unlock_tasks[k]->wait , 1 ) == 0 ) diff --git a/src/space.c b/src/space.c index 9e8d02a4b4216abd0ea1538108bfa2ad269ada80..60d2019f19f5c94a8d3e0edbb60b28e5811a0bd6 100644 --- a/src/space.c +++ b/src/space.c @@ -39,7 +39,7 @@ #include "runner.h" /* Error macro. */ -#define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /* Split size. */ int space_splitsize = space_splitsize_default; @@ -288,6 +288,7 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ + #pragma omp parallel for shared(s) reduction(+:changes) for ( k = 0 ; k < s->nr_cells ; k++ ) changes += space_rebuild_recurse( s , &s->cells[k] ); @@ -344,14 +345,35 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) { error( "Sorting failed (>pivot)." ); } - /* Recurse on the left? */ - if ( j > 0 && pivot > min ) - parts_sort( parts , ind , j+1 , min , pivot ); + /* Try to recurse in parallel. */ + if ( N < 100 ) { + + /* Recurse on the left? */ + if ( j > 0 && pivot > min ) + parts_sort( parts , ind , j+1 , min , pivot ); + + /* Recurse on the right? */ + if ( i < N && pivot+1 < max ) + parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + + } - /* Recurse on the right? */ - if ( i < N && pivot+1 < max ) - parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + else + #pragma omp parallel sections + { + + /* Recurse on the left? */ + #pragma omp section + if ( j > 0 && pivot > min ) + parts_sort( parts , ind , j+1 , min , pivot ); + + /* Recurse on the right? */ + #pragma omp section + if ( i < N && pivot+1 < max ) + parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + } + } @@ -413,101 +435,6 @@ void space_map_clearnrtasks ( struct cell *c , void *data ) { } -/** - * @brief Get a task free of dependencies and conflicts. - * - * @param s The #space. - */ - -struct task *space_gettask ( struct space *s ) { - - int k, tid = -1; - struct task *res = NULL; - struct cell *c; - - /* Main loop, while there are tasks... */ - while ( s->next_task < s->nr_tasks ) { - - /* Grab the task lock. */ - if ( lock_lock( &s->task_lock ) != 0 ) - error( "Locking the task_lock failed.\n" ); - - /* Loop over the remaining task IDs. */ - for ( k = s->next_task ; k < s->nr_tasks ; k++ ) { - - /* Put a finger on the task. */ - res = &s->tasks[ s->tasks_ind[k] ]; - - /* Is this task blocked? */ - if ( res->wait ) - continue; - - /* Different criteria for different types. */ - switch ( res->type ) { - case task_type_self: - if ( res->ci->lock || res->ci->wait > 0 ) - continue; - break; - case task_type_pair: - if ( res->ci->lock || res->cj->lock || res->ci->wait || res->cj->wait ) - continue; - break; - case task_type_sort: - if ( res->ci->lock ) - continue; - break; - } - - /* If we made it this far, we're safe. */ - break; - - } /* loop over the task IDs. */ - - /* Did we get a task? */ - if ( k < s->nr_tasks ) { - - // /* Swap to front. */ - // tid = s->tasks_ind[k]; - // s->tasks_ind[k] = s->tasks_ind[ s->next_task ]; - // s->tasks_ind[ s->next_task ] = tid; - - /* Bubble-down the task. */ - tid = s->tasks_ind[k]; - while ( k > s->next_task ) { - s->tasks_ind[ k ] = s->tasks_ind[ k-1 ]; - k -= 1; - } - s->tasks_ind[ s->next_task ] = tid; - - /* Lock the cells, if needed. */ - if ( s->tasks[tid].type != task_type_sort ) { - for ( c = res->ci ; c != NULL ; c = c->parent ) - __sync_fetch_and_add( &c->lock , 1 ); - for ( c = res->cj ; c != NULL ; c = c->parent ) - __sync_fetch_and_add( &c->lock , 1 ); - } - - /* up the counter. */ - s->next_task += 1; - - } - - /* Release the task lock. */ - if ( lock_unlock( &s->task_lock ) != 0 ) - error( "Locking the task_lock failed.\n" ); - - /* Leave? */ - if ( tid >= 0 ) - return &s->tasks[tid]; - - } /* while there are tasks. */ - - /* No beef. */ - return NULL; - - } - - /** * @brief Map a function to all particles in a aspace. * @@ -584,7 +511,13 @@ void space_map_cells ( struct space *s , int full , void (*fun)( struct cell *c struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , struct task *unlock_tasks[] , int nr_unlock_tasks , struct cell *unlock_cells[] , int nr_unlock_cells ) { - struct task *t = &s->tasks[ s->nr_tasks ]; + struct task *t; + + /* Lock the space. */ + lock_lock( &s->lock ); + + /* Get the next free task. */ + t = &s->tasks[ s->nr_tasks ]; /* Copy the data. */ t->type = type; @@ -603,6 +536,9 @@ struct task *space_addtask ( struct space *s , int type , int subtype , int flag /* Increase the task counter. */ s->nr_tasks += 1; + /* Unock the space. */ + lock_unlock_blind( &s->lock ); + /* Return a pointer to the new task. */ return t; @@ -1344,9 +1280,6 @@ void space_maketasks ( struct space *s , int do_sort ) { printf( " %s=%i" , taskID_names[k] , counts[k] ); printf( " ]\n" ); - /* Re-set the next task pointer. */ - s->next_task = 0; - } @@ -1444,6 +1377,9 @@ void space_split ( struct space *s , struct cell *c ) { void space_recycle ( struct space *s , struct cell *c ) { + /* Lock the space. */ + lock_lock( &s->lock ); + /* Clear the cell. */ if ( lock_destroy( &c->lock ) != 0 ) error( "Failed to destroy spinlock." ); @@ -1457,6 +1393,9 @@ void space_recycle ( struct space *s , struct cell *c ) { s->cells_new = c; s->tot_cells -= 1; + /* Unlock the space. */ + lock_unlock_blind( &s->lock ); + } @@ -1471,6 +1410,9 @@ struct cell *space_getcell ( struct space *s ) { struct cell *c; int k; + /* Lock the space. */ + lock_lock( &s->lock ); + /* Is the buffer empty? */ if ( s->cells_new == NULL ) { if ( posix_memalign( (void *)&s->cells_new , 64 , space_cellallocchunk * sizeof(struct cell) ) != 0 ) @@ -1491,6 +1433,9 @@ struct cell *space_getcell ( struct space *s ) { if ( lock_init( &c->lock ) != 0 ) error( "Failed to initialize cell spinlock." ); + /* Unlock the space. */ + lock_unlock_blind( &s->lock ); + return c; } @@ -1519,8 +1464,10 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , s->periodic = periodic; s->nr_parts = N; s->parts = parts; - if ( lock_init( &s->task_lock ) != 0 ) - error( "Failed to create task spin-lock." ); + + /* Init the space lock. */ + if ( lock_init( &s->lock ) != 0 ) + error( "Failed to create space spin-lock." ); /* Build the cells and the tasks. */ space_rebuild( s , 1 , h_max ); diff --git a/src/space.h b/src/space.h index 532bed551115c7bd5eb446043b55cc9e88bd5383..6230cb298f419da4d5412b30acadda735d8ca985 100644 --- a/src/space.h +++ b/src/space.h @@ -87,9 +87,11 @@ struct space { /* The list of tasks. */ struct task *tasks; - int nr_tasks, next_task, tasks_size; + int nr_tasks, tasks_size; int *tasks_ind; - lock_type task_lock; + + /* General-purpose lock for this space. */ + lock_type lock; }; diff --git a/src/task.c b/src/task.c index 7a0b8a5b94da32bce914dea53139631b6b3d8236..81b9babfcac4df407aa31756a2bc3526d93d8aca 100644 --- a/src/task.c +++ b/src/task.c @@ -41,7 +41,7 @@ const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair" , "sub" , "ghost" }; /* Error macro. */ -#define error(s) { printf( "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } +#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); } /** @@ -55,10 +55,13 @@ void task_rmunlock( struct task *ta , struct task *tb ) { int k; + lock_lock( &ta->lock ); + for ( k = 0 ; k < ta->nr_unlock_tasks ; k++ ) if ( ta->unlock_tasks[k] == tb ) { ta->nr_unlock_tasks -= 1; ta->unlock_tasks[k] = ta->unlock_tasks[ ta->nr_unlock_tasks ]; + lock_unlock_blind( &ta->lock ); return; } error( "Task not found." ); @@ -80,12 +83,16 @@ void task_rmunlock_blind( struct task *ta , struct task *tb ) { int k; + lock_lock( &ta->lock ); + for ( k = 0 ; k < ta->nr_unlock_tasks ; k++ ) if ( ta->unlock_tasks[k] == tb ) { ta->nr_unlock_tasks -= 1; ta->unlock_tasks[k] = ta->unlock_tasks[ ta->nr_unlock_tasks ]; - return; + break; } + + lock_unlock_blind( &ta->lock ); } @@ -105,10 +112,14 @@ void task_addunlock( struct task *ta , struct task *tb ) { if ( ta == NULL || tb == NULL ) return; + lock_lock( &ta->lock ); + /* Check if ta already unlocks tb. */ for ( k = 0 ; k < ta->nr_unlock_tasks ; k++ ) - if ( ta->unlock_tasks[k] == tb ) + if ( ta->unlock_tasks[k] == tb ) { + lock_unlock_blind( &ta->lock ); return; + } if ( ta->nr_unlock_tasks == task_maxunlock ) error( "Too many unlock_tasks in task." ); @@ -116,6 +127,8 @@ void task_addunlock( struct task *ta , struct task *tb ) { ta->unlock_tasks[ ta->nr_unlock_tasks] = tb; ta->nr_unlock_tasks += 1; + lock_unlock_blind( &ta->lock ); + } diff --git a/src/task.h b/src/task.h index 2a4d20fd55b69f947d5850211085b1c23e4f5071..cd1c692f8147e9c5a354c5da02addedf3eb85148 100644 --- a/src/task.h +++ b/src/task.h @@ -49,7 +49,9 @@ extern const char *taskID_names[]; /* Data of a task. */ struct task { - int type, subtype, flags, wait, rank, done; + int type, subtype, flags, wait, rank; + + lock_type lock; int nr_unlock_tasks; struct task *unlock_tasks[ task_maxunlock ];