diff --git a/src/Makefile.am b/src/Makefile.am index 9c6a97a9f38152bb0108846b2ec950711e132bfd..a1683de16484b121fe2d1497aa585bf19b58a795 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -22,7 +22,7 @@ AUTOMAKE_OPTIONS=gnu # Add the debug flag to the whole thing AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \ -funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \ - -DTIMER -DCOUNTER -DCPU_TPS=2.40e9 + -DTIMER -DCOUNTER -DCPU_TPS=2.30e9 # AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \ # -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 diff --git a/src/cell.c b/src/cell.c index 9ee55dfa3eae40acc77f11eec8c9a544c4235527..454f54d9cd3a59554a8d44cf9cf8a800b10a0ba8 100644 --- a/src/cell.c +++ b/src/cell.c @@ -29,6 +29,11 @@ #include <limits.h> #include <math.h> +/* Switch off timers. */ +#ifdef TIMER + #undef TIMER +#endif + /* Local headers. */ #include "cycle.h" #include "lock.h" diff --git a/src/engine.c b/src/engine.c index b3630aa148681220afd8ee6f89297d66d9213dfa..7c98d36d543c8e220c7d8aa283aec94968378676 100644 --- a/src/engine.c +++ b/src/engine.c @@ -213,6 +213,9 @@ void engine_maketasks ( struct engine *e ) { /* Rank the tasks. */ scheduler_ranktasks( sched ); + /* Weight the tasks. */ + scheduler_reweight( sched ); + /* Count the number of each task type. */ int counts[ task_type_count+1 ]; for ( k = 0 ; k <= task_type_count ; k++ ) @@ -868,6 +871,7 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread e->runners[k].cpuid = k; e->runners[k].qid = k * nr_queues / nr_threads; #endif + // printf( "engine_init: runner %i on cpuid=%i with qid=%i.\n" , e->runners[k].id , e->runners[k].cpuid , e->runners[k].qid ); } /* Wait for the runner threads to be in place. */ diff --git a/src/scheduler.c b/src/scheduler.c index 18515ff1da2ce5982f8cd681ad0b064a1840d328..4d880b01c41845c70033897e015cdf4ae953b983 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -555,25 +555,23 @@ void scheduler_reset ( struct scheduler *s , int size ) { /** - * @brief Start the scheduler, i.e. fill the queues with ready tasks. + * @brief Compute the task weights * * @param s The #scheduler. */ -void scheduler_start ( struct scheduler *s , unsigned int mask ) { +void scheduler_reweight ( struct scheduler *s ) { int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind; struct task *t, *tasks = s->tasks; + // ticks tic; /* Run throught the tasks backwards and set their waits and weights. */ + // tic = getticks(); // #pragma omp parallel for schedule(static) private(t,j) for ( k = nr_tasks-1 ; k >= 0 ; k-- ) { t = &tasks[ tid[k] ]; - if ( !( (1 << t->type) & mask ) || t->skip ) - continue; - for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) - atomic_inc( &t->unlock_tasks[j]->wait ); t->weight = 0; for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) if ( t->unlock_tasks[j]->weight > t->weight ) @@ -583,19 +581,19 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) { else switch ( t->type ) { case task_type_sort: - t->weight += t->ci->count * ( sizeof(int)*8 - __builtin_clz( t->ci->count ) ); + t->weight += __builtin_popcount( t->flags ) * t->ci->count * ( sizeof(int)*8 - __builtin_clz( t->ci->count ) ); break; case task_type_self: - t->weight += t->ci->count * t->ci->count; + t->weight += 2 * t->ci->count * t->ci->count; break; case task_type_pair: - t->weight += t->ci->count * t->cj->count; + t->weight += 2 * t->ci->count * t->cj->count; break; case task_type_sub: if ( t->cj != NULL ) - t->weight += t->ci->count * t->cj->count; + t->weight += 2 * t->ci->count * t->cj->count; else - t->weight += t->ci->count * t->ci->count; + t->weight += 2 * t->ci->count * t->ci->count; break; case task_type_ghost: if ( t->ci == t->ci->super ) @@ -607,8 +605,39 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) { break; } } + // printf( "scheduler_reweight: weighting tasks took %.3f ms.\n" , (double)( getticks() - tic ) / CPU_TPS * 1000 ); + + } + + +/** + * @brief Start the scheduler, i.e. fill the queues with ready tasks. + * + * @param s The #scheduler. + * @param mask The task types to enqueue. + */ + +void scheduler_start ( struct scheduler *s , unsigned int mask ) { + + int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind; + struct task *t, *tasks = s->tasks; + // ticks tic; + + /* Run throught the tasks backwards and set their waits. */ + // tic = getticks(); + // #pragma omp parallel for schedule(static) private(t,j) + for ( k = nr_tasks-1 ; k >= 0 ; k-- ) { + t = &tasks[ tid[k] ]; + if ( !( (1 << t->type) & mask ) || t->skip ) + continue; + for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) + atomic_inc( &t->unlock_tasks[j]->wait ); + } + // printf( "scheduler_reweight: waiting tasks took %.3f ms.\n" , (double)( getticks() - tic ) / CPU_TPS * 1000 ); /* Loop over the tasks and enqueue whoever is ready. */ + // tic = getticks(); + // #pragma omp parallel for schedule(static) private(t) for ( k = 0 ; k < nr_tasks ; k++ ) { t = &tasks[ tid[k] ]; if ( t->rank > 0 ) @@ -616,6 +645,7 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) { if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 ) scheduler_enqueue( s , t ); } + // printf( "scheduler_start: enqueueing tasks took %.3f ms.\n" , (double)( getticks() - tic ) / CPU_TPS * 1000 ); } @@ -742,6 +772,10 @@ struct task *scheduler_gettask ( struct scheduler *s , int qid ) { struct task *res = NULL; int k, nr_queues = s->nr_queues; + /* Check qid. */ + if ( qid >= nr_queues || qid < 0 ) + error( "Bad queue ID." ); + /* Loop as long as there are tasks... */ while ( s->waiting > 0 && res == NULL ) { diff --git a/src/scheduler.h b/src/scheduler.h index e2dd191c88ad76c9160aa1ca6ae1a05d715033f4..ee77aeafc00a96f492352eb2bbaeb381970d3828 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -73,8 +73,10 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ); void scheduler_start ( struct scheduler *s , unsigned int mask ); void scheduler_reset ( struct scheduler *s , int nr_tasks ); void scheduler_ranktasks ( struct scheduler *s ); +void scheduler_reweight ( struct scheduler *s ); struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ); void scheduler_splittasks ( struct scheduler *s ); void scheduler_map_mkghosts ( struct cell *c , void *data ); void scheduler_map_mkkick1 ( struct cell *c , void *data ); void scheduler_done ( struct scheduler *s , struct task *t ); +