diff --git a/src/cell.h b/src/cell.h index 211f16ab3078b082395ebee0babc594ec2b7882d..6005929f35cb61294247d593f05680e4ce3b3d7b 100644 --- a/src/cell.h +++ b/src/cell.h @@ -88,7 +88,7 @@ struct cell { int nr_density; /* The ghost task to link density to interactions. */ - struct task *ghost, *kick2; + struct task *ghost, *kick1, *kick2; /* Number of tasks that are associated with this cell. */ int nr_tasks; @@ -117,10 +117,6 @@ struct cell { /* Linking pointer for "memory management". */ struct cell *next; - /* Timing stuff. */ - ticks tic, toc; - int tid; - } __attribute__((aligned (64))); diff --git a/src/engine.c b/src/engine.c index 9fde2d8bb681a0bb0e4bf7a64a5b56945eaa571c..8637ee07f44b8f88aa00d9dab9f83e1ed4318cc6 100644 --- a/src/engine.c +++ b/src/engine.c @@ -70,7 +70,7 @@ void engine_maketasks ( struct engine *e ) { struct cell *ci, *cj; /* Re-set the scheduler. */ - scheduler_reset( sched , s->tot_cells * space_maxtaskspercell ); + scheduler_reset( sched , s->tot_cells * engine_maxtaskspercell ); /* Run through the highest level of cells and add pairs. */ for ( i = 0 ; i < cdim[0] ; i++ ) @@ -351,7 +351,12 @@ void engine_prepare ( struct engine *e ) { } /* Start the scheduler. */ - scheduler_start( &e->sched ); + scheduler_start( &e->sched , (1 << task_type_sort) | + (1 << task_type_self) | + (1 << task_type_pair) | + (1 << task_type_sub) | + (1 << task_type_ghost) | + (1 << task_type_kick2) ); TIMER_TOC( timer_prepare ); @@ -403,151 +408,6 @@ void engine_barrier( struct engine *e ) { } -/** - * @brief Mapping function to set dt_min and dt_max, do the first - * kick. - */ - -void engine_map_kick_first ( struct cell *c , void *data ) { - - int j, k; - struct engine *e = (struct engine *)data; - float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; - float dt_min, dt_max, h_max, dx, dx_max; - float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho; - double x[3], x_old[3]; - struct part *restrict p; - struct xpart *restrict xp; - - /* No children? */ - if ( !c->split ) { - - /* Store the timer. */ - c->tic = getticks(); - c->tid = omp_get_thread_num(); - - /* Init the min/max counters. */ - dt_min = FLT_MAX; - dt_max = 0.0f; - h_max = 0.0f; - dx_max = 0.0f; - - /* Loop over parts. */ - for ( k = 0 ; k < c->count ; k++ ) { - - /* Get a handle on the kth particle. */ - p = &c->parts[k]; - xp = p->xtras; - - /* Load the data locally. */ - a[0] = p->a[0]; a[1] = p->a[1]; a[2] = p->a[2]; - v[0] = p->v[0]; v[1] = p->v[1]; v[2] = p->v[2]; - x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2]; - x_old[0] = xp->x_old[0]; x_old[1] = xp->x_old[1]; x_old[2] = xp->x_old[2]; - h = p->h; - u = p->u; - h_dt = p->force.h_dt; - u_dt = p->force.u_dt; - pdt = p->dt; - - /* Store the min/max dt. */ - dt_min = fminf( dt_min , pdt ); - dt_max = fmaxf( dt_max , pdt ); - - /* Step and store the velocity and internal energy. */ - xp->v_old[0] = v_old[0] = v[0] + hdt * a[0]; - xp->v_old[1] = v_old[1] = v[1] + hdt * a[1]; - xp->v_old[2] = v_old[2] = v[2] + hdt * a[2]; - xp->u_old = p->u + hdt * p->force.u_dt; - - /* Move the particles with the velocitie at the half-step. */ - p->x[0] = x[0] += dt * v_old[0]; - p->x[1] = x[1] += dt * v_old[1]; - p->x[2] = x[2] += dt * v_old[2]; - dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) + - (x[1] - x_old[1])*(x[1] - x_old[1]) + - (x[2] - x_old[2])*(x[2] - x_old[2]) ); - dx_max = fmaxf( dx_max , dx ); - - /* Update positions and energies at the half-step. */ - p->v[0] = v[0] += dt * a[0]; - p->v[1] = v[1] += dt * a[1]; - p->v[2] = v[2] += dt * a[2]; - w = u_dt / u * dt; - if ( fabsf( w ) < 0.01f ) - p->u = u *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ); - else - p->u = u *= expf( w ); - w = h_dt / h * dt; - if ( fabsf( w ) < 0.01f ) - p->h = h *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ); - else - p->h = h *= expf( w ); - h_max = fmaxf( h_max , h ); - - - /* Integrate other values if this particle will not be updated. */ - /* Init fields for density calculation. */ - if ( pdt > dt_step ) { - float w = -3.0f * h_dt / h * dt; - if ( fabsf( w ) < 0.1f ) - rho = p->rho *= 1.0f + w*( 1.0f + w*( 0.5f + w*(1.0f/6.0f + 1.0f/24.0f*w ) ) ); - else - rho = p->rho *= expf( w ); - p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho * xp->omega ); - } - else { - p->density.wcount = 0.0f; - p->density.wcount_dh = 0.0f; - p->rho = 0.0f; - p->rho_dh = 0.0f; - p->density.div_v = 0.0f; - for ( j = 0 ; j < 3 ; ++j) - p->density.curl_v[j] = 0.0f; - } - - } - - /* Store the timer. */ - c->toc = getticks(); - - } - - /* Otherwise, agregate data from children. */ - else { - - /* Init with the first non-null child. */ - dt_min = FLT_MAX; - dt_max = 0.0f; - h_max = 0.0f; - dx_max = 0.0f; - - /* Loop over the remaining progeny. */ - for ( k = 0 ; k < 8 ; k++ ) - if ( c->progeny[k] != NULL ) { - #pragma omp task - engine_map_kick_first( c->progeny[k] , e ); - } - #pragma omp taskwait - for ( k = 0 ; k < 8 ; k++ ) - if ( c->progeny[k] != NULL ) { - dt_min = fminf( dt_min , c->progeny[k]->dt_min ); - dt_max = fmaxf( dt_max , c->progeny[k]->dt_max ); - h_max = fmaxf( h_max , c->progeny[k]->h_max ); - dx_max = fmaxf( dx_max , c->progeny[k]->dx_max ); - } - - } - - /* Store the values. */ - c->dt_min = dt_min; - c->dt_max = dt_max; - c->h_max = h_max; - c->dx_max = dx_max; - - } - - /** * @brief Mapping function to collect the data from the second kick. */ @@ -736,36 +596,20 @@ void engine_step ( struct engine *e ) { /* First kick. */ TIMER_TIC - // space_map_cells_post( e->s , 1 , &engine_map_kick_first , e ); - /* k = 0; - #pragma omp parallel shared(k,e) - { - int myk; - while ( 1 ) { - #pragma omp critical - myk = k++; - if ( myk < e->s->nr_cells ) { - e->s->cells[myk].tic = getticks(); - e->s->cells[myk].tid = omp_get_thread_num(); - engine_map_kick_first( &e->s->cells[myk] , e ); - e->s->cells[myk].toc = getticks(); - } - else - break; - } - } */ - #pragma omp parallel private(k) - { - #pragma omp single - { - for ( k = 0 ; k < e->s->nr_cells ; k++ ) { - #pragma omp task - engine_map_kick_first( &e->s->cells[k] , e ); - } - } - #pragma omp taskwait - } + scheduler_start( &e->sched , (1 << task_type_kick1) ); + e->barrier_count = -e->barrier_count; + if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 ) + error( "Failed to broadcast barrier open condition." ); + while ( e->barrier_count < e->nr_threads ) + if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 ) + error( "Error while waiting for barrier." ); TIMER_TOC( timer_kick1 ); + + /* Check if all the kick1 threads have executed. */ + for ( k = 0 ; k < e->sched.nr_tasks ; k++ ) + if ( e->sched.tasks[k].type == task_type_kick1 && + e->sched.tasks[k].tic == 0 ) + error( "Not all kick1 tasks completed." ); // for(k=0; k<10; ++k) // printParticle(parts, k); @@ -919,6 +763,10 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread /* Init the scheduler. */ scheduler_init( &e->sched , e->s , nr_queues , scheduler_flag_steal ); + /* Append a kick1 task to each cell. */ + scheduler_reset( &e->sched , s->tot_cells ); + space_map_cells_pre( e->s , 1 , &scheduler_map_mkkick1 , &e->sched ); + /* Allocate and init the threads. */ if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL ) error( "Failed to allocate threads array." ); diff --git a/src/engine.h b/src/engine.h index 8efae350763a2e5133aa2199ec52207ca2e6b981..ade67006c7c1b1ebdd6f28dd2b083493acb4bae1 100644 --- a/src/engine.h +++ b/src/engine.h @@ -29,6 +29,7 @@ #define engine_policy_multistep 32 #define engine_queue_scale 1.2 +#define engine_maxtaskspercell 32 /* Data structure for the engine. */ diff --git a/src/runner.c b/src/runner.c index 90c34007d66183a77793157fa6bfbef52c6baa8d..6272236984af45c1930616f09c13ce08d72b7873 100644 --- a/src/runner.c +++ b/src/runner.c @@ -607,6 +607,138 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { } +/** + * @brief Mapping function to set dt_min and dt_max, do the first + * kick. + */ + +void runner_dokick1 ( struct runner *r , struct cell *c ) { + + int j, k; + struct engine *e = r->e; + float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; + float dt_min, dt_max, h_max, dx, dx_max; + float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho; + double x[3], x_old[3]; + struct part *restrict p; + struct xpart *restrict xp; + + /* No children? */ + if ( !c->split ) { + + /* Init the min/max counters. */ + dt_min = FLT_MAX; + dt_max = 0.0f; + h_max = 0.0f; + dx_max = 0.0f; + + /* Loop over parts. */ + for ( k = 0 ; k < c->count ; k++ ) { + + /* Get a handle on the kth particle. */ + p = &c->parts[k]; + xp = p->xtras; + + /* Load the data locally. */ + a[0] = p->a[0]; a[1] = p->a[1]; a[2] = p->a[2]; + v[0] = p->v[0]; v[1] = p->v[1]; v[2] = p->v[2]; + x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2]; + x_old[0] = xp->x_old[0]; x_old[1] = xp->x_old[1]; x_old[2] = xp->x_old[2]; + h = p->h; + u = p->u; + h_dt = p->force.h_dt; + u_dt = p->force.u_dt; + pdt = p->dt; + + /* Store the min/max dt. */ + dt_min = fminf( dt_min , pdt ); + dt_max = fmaxf( dt_max , pdt ); + + /* Step and store the velocity and internal energy. */ + xp->v_old[0] = v_old[0] = v[0] + hdt * a[0]; + xp->v_old[1] = v_old[1] = v[1] + hdt * a[1]; + xp->v_old[2] = v_old[2] = v[2] + hdt * a[2]; + xp->u_old = p->u + hdt * p->force.u_dt; + + /* Move the particles with the velocitie at the half-step. */ + p->x[0] = x[0] += dt * v_old[0]; + p->x[1] = x[1] += dt * v_old[1]; + p->x[2] = x[2] += dt * v_old[2]; + dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) + + (x[1] - x_old[1])*(x[1] - x_old[1]) + + (x[2] - x_old[2])*(x[2] - x_old[2]) ); + dx_max = fmaxf( dx_max , dx ); + + /* Update positions and energies at the half-step. */ + p->v[0] = v[0] += dt * a[0]; + p->v[1] = v[1] += dt * a[1]; + p->v[2] = v[2] += dt * a[2]; + w = u_dt / u * dt; + if ( fabsf( w ) < 0.01f ) + p->u = u *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ); + else + p->u = u *= expf( w ); + w = h_dt / h * dt; + if ( fabsf( w ) < 0.01f ) + p->h = h *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ); + else + p->h = h *= expf( w ); + h_max = fmaxf( h_max , h ); + + + /* Integrate other values if this particle will not be updated. */ + /* Init fields for density calculation. */ + if ( pdt > dt_step ) { + float w = -3.0f * h_dt / h * dt; + if ( fabsf( w ) < 0.1f ) + rho = p->rho *= 1.0f + w*( 1.0f + w*( 0.5f + w*(1.0f/6.0f + 1.0f/24.0f*w ) ) ); + else + rho = p->rho *= expf( w ); + p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho * xp->omega ); + } + else { + p->density.wcount = 0.0f; + p->density.wcount_dh = 0.0f; + p->rho = 0.0f; + p->rho_dh = 0.0f; + p->density.div_v = 0.0f; + for ( j = 0 ; j < 3 ; ++j) + p->density.curl_v[j] = 0.0f; + } + + } + + } + + /* Otherwise, agregate data from children. */ + else { + + /* Init with the first non-null child. */ + dt_min = FLT_MAX; + dt_max = 0.0f; + h_max = 0.0f; + dx_max = 0.0f; + + /* Loop over the progeny. */ + for ( k = 0 ; k < 8 ; k++ ) + if ( c->progeny[k] != NULL ) { + dt_min = fminf( dt_min , c->progeny[k]->dt_min ); + dt_max = fmaxf( dt_max , c->progeny[k]->dt_max ); + h_max = fmaxf( h_max , c->progeny[k]->h_max ); + dx_max = fmaxf( dx_max , c->progeny[k]->dx_max ); + } + + } + + /* Store the values. */ + c->dt_min = dt_min; + c->dt_max = dt_max; + c->h_max = h_max; + c->dx_max = dx_max; + + } + + /** * @brief The #runner main thread routine. * @@ -679,6 +811,9 @@ void *runner_main ( void *data ) { if ( ci->super == ci ) runner_doghost( r , ci ); break; + case task_type_kick1: + runner_dokick1( r , ci ); + break; case task_type_kick2: runner_dokick2( r , ci ); break; diff --git a/src/scheduler.c b/src/scheduler.c index 9864191fa419f8ed166585441e718d08dcb9df1d..76c495c7ee6e16830515cb9adc900e76dab9fb8f 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -71,7 +71,12 @@ void scheduler_map_mkghosts ( struct cell *c , void *data ) { if ( c->super != c || c->nr_tasks > 0 ) c->ghost = scheduler_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 ); - /* Append a kick task if we are the active super cell. */ + /* Append a kick1 task and make sure the parent depends on it. */ + c->kick1 = scheduler_addtask( s , task_type_kick1 , task_subtype_none , 0 , 0 , c , NULL , 0 ); + if ( c->parent != NULL ) + task_addunlock( c->kick1 , c->parent->kick1 ); + + /* Append a kick2 task if we are the active super cell. */ if ( c->super == c && c->nr_tasks > 0 ) c->kick2 = scheduler_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 ); @@ -83,6 +88,29 @@ void scheduler_map_mkghosts ( struct cell *c , void *data ) { } +/** + * @brief Mapping function to append a ghost task to each cell. + * + * A kick1-task is appended to each cell. + */ + +void scheduler_map_mkkick1 ( struct cell *c , void *data ) { + + struct scheduler *s = (struct scheduler *)data; + struct cell *finger; + + /* Append a kick1 task and make sure the parent depends on it. */ + c->kick1 = scheduler_addtask( s , task_type_kick1 , task_subtype_none , 0 , 0 , c , NULL , 0 ); + if ( c->parent != NULL ) + task_addunlock( c->kick1 , c->parent->kick1 ); + + /* Set a bogus super cell. */ + for ( finger = c ; finger->parent != NULL ; finger = finger->parent ); + c->super = finger; + + } + + /** * @brief Split tasks that may be too large. * @@ -525,7 +553,7 @@ void scheduler_reset ( struct scheduler *s , int size ) { * @param s The #scheduler. */ -void scheduler_start ( struct scheduler *s ) { +void scheduler_start ( struct scheduler *s , unsigned int mask ) { int k, j, *tid = s->tasks_ind; struct task *t, *tasks = s->tasks; @@ -535,7 +563,7 @@ void scheduler_start ( struct scheduler *s ) { // #pragma omp parallel for schedule(static) private(t,j) for ( k = s->nr_tasks-1 ; k >= 0 ; k-- ) { t = &tasks[ tid[k] ]; - if ( !t->skip ) { + if ( ( (1 << t->type) & mask ) && !t->skip ) { for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) atomic_inc( &t->unlock_tasks[j]->wait ); t->maxdepth = 0; @@ -570,6 +598,7 @@ void scheduler_start ( struct scheduler *s ) { if ( t->ci == t->ci->super ) t->weight += t->ci->count; break; + case task_type_kick1: case task_type_kick2: t->weight += t->ci->count; break; @@ -580,7 +609,7 @@ void scheduler_start ( struct scheduler *s ) { /* Loop over the tasks and enqueue whoever is ready. */ for ( k = 0 ; k < s->nr_tasks ; k++ ) { t = &s->tasks[k]; - if ( !t->skip && t->wait == 0 ) + if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 ) scheduler_enqueue( s , t ); } @@ -607,6 +636,7 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) { case task_type_self: case task_type_sort: case task_type_ghost: + case task_type_kick1: case task_type_kick2: qid = t->ci->super->owner; break; @@ -617,6 +647,8 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) { ( qid < 0 || s->queues[qid].count > s->queues[t->cj->super->owner].count ) ) qid = t->cj->super->owner; break; + default: + qid = -1; } /* If no previous owner, find the shortest queue. */ diff --git a/src/scheduler.h b/src/scheduler.h index b4c7b871e0081994addbc547fcf4544019ef8dfb..b20c7ecaf793bc0dceb6d29ad4f74d0423ba7067 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -71,10 +71,11 @@ struct scheduler { void scheduler_init ( struct scheduler *s , struct space *space , int nr_queues , unsigned int flags ); struct task *scheduler_gettask ( struct scheduler *s , int qid ); void scheduler_enqueue ( struct scheduler *s , struct task *t ); -void scheduler_start ( struct scheduler *s ); +void scheduler_start ( struct scheduler *s , unsigned int mask ); void scheduler_reset ( struct scheduler *s , int nr_tasks ); void scheduler_ranktasks ( struct scheduler *s ); struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight ); void scheduler_splittasks ( struct scheduler *s ); void scheduler_map_mkghosts ( struct cell *c , void *data ); +void scheduler_map_mkkick1 ( struct cell *c , void *data ); void scheduler_done ( struct scheduler *s , struct task *t ); diff --git a/src/space.h b/src/space.h index 6e31cd4397da07c01100e3e1766e33161b76bc4b..6995c7f34d207a597b573885b3b6055bc081a763 100644 --- a/src/space.h +++ b/src/space.h @@ -27,7 +27,6 @@ #define space_splitsize_default 400 #define space_subsize_default 5000 #define space_stretch 1.05f -#define space_maxtaskspercell 31 #define space_maxreldx 0.2f diff --git a/src/task.c b/src/task.c index f75f96312e6b9fc2508bec63df4059b76131632d..1f27bc08401e609208ff85cef4c0afba2ce32ae8 100644 --- a/src/task.c +++ b/src/task.c @@ -39,7 +39,7 @@ #include "error.h" /* Task type names. */ -const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair" , "sub" , "ghost" , "kick2" }; +const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair" , "sub" , "ghost" , "kick1" , "kick2" }; /** diff --git a/src/task.h b/src/task.h index fb7192744c8f2f010f51483bf15af8bbbea5a5e5..1eb7fad118bcd3d5f1216f37554f226605f10459 100644 --- a/src/task.h +++ b/src/task.h @@ -31,6 +31,7 @@ enum task_types { task_type_pair, task_type_sub, task_type_ghost, + task_type_kick1, task_type_kick2, task_type_count };