Commit 4e9b5b63 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

screw openmp. made tasks for kick1, adapted scheduler to launch only certain...

screw openmp. made tasks for kick1, adapted scheduler to launch only certain task types on scheduler_reset.


Former-commit-id: 90d4d764588d3ce123ac490a4cd526b23f4dd9d9
parent d0895846
......@@ -88,7 +88,7 @@ struct cell {
int nr_density;
/* The ghost task to link density to interactions. */
struct task *ghost, *kick2;
struct task *ghost, *kick1, *kick2;
/* Number of tasks that are associated with this cell. */
int nr_tasks;
......@@ -117,10 +117,6 @@ struct cell {
/* Linking pointer for "memory management". */
struct cell *next;
/* Timing stuff. */
ticks tic, toc;
int tid;
} __attribute__((aligned (64)));
......
......@@ -70,7 +70,7 @@ void engine_maketasks ( struct engine *e ) {
struct cell *ci, *cj;
/* Re-set the scheduler. */
scheduler_reset( sched , s->tot_cells * space_maxtaskspercell );
scheduler_reset( sched , s->tot_cells * engine_maxtaskspercell );
/* Run through the highest level of cells and add pairs. */
for ( i = 0 ; i < cdim[0] ; i++ )
......@@ -351,7 +351,12 @@ void engine_prepare ( struct engine *e ) {
}
/* Start the scheduler. */
scheduler_start( &e->sched );
scheduler_start( &e->sched , (1 << task_type_sort) |
(1 << task_type_self) |
(1 << task_type_pair) |
(1 << task_type_sub) |
(1 << task_type_ghost) |
(1 << task_type_kick2) );
TIMER_TOC( timer_prepare );
......@@ -403,151 +408,6 @@ void engine_barrier( struct engine *e ) {
}
/**
* @brief Mapping function to set dt_min and dt_max, do the first
* kick.
*/
void engine_map_kick_first ( struct cell *c , void *data ) {
int j, k;
struct engine *e = (struct engine *)data;
float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
float dt_min, dt_max, h_max, dx, dx_max;
float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho;
double x[3], x_old[3];
struct part *restrict p;
struct xpart *restrict xp;
/* No children? */
if ( !c->split ) {
/* Store the timer. */
c->tic = getticks();
c->tid = omp_get_thread_num();
/* Init the min/max counters. */
dt_min = FLT_MAX;
dt_max = 0.0f;
h_max = 0.0f;
dx_max = 0.0f;
/* Loop over parts. */
for ( k = 0 ; k < c->count ; k++ ) {
/* Get a handle on the kth particle. */
p = &c->parts[k];
xp = p->xtras;
/* Load the data locally. */
a[0] = p->a[0]; a[1] = p->a[1]; a[2] = p->a[2];
v[0] = p->v[0]; v[1] = p->v[1]; v[2] = p->v[2];
x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
x_old[0] = xp->x_old[0]; x_old[1] = xp->x_old[1]; x_old[2] = xp->x_old[2];
h = p->h;
u = p->u;
h_dt = p->force.h_dt;
u_dt = p->force.u_dt;
pdt = p->dt;
/* Store the min/max dt. */
dt_min = fminf( dt_min , pdt );
dt_max = fmaxf( dt_max , pdt );
/* Step and store the velocity and internal energy. */
xp->v_old[0] = v_old[0] = v[0] + hdt * a[0];
xp->v_old[1] = v_old[1] = v[1] + hdt * a[1];
xp->v_old[2] = v_old[2] = v[2] + hdt * a[2];
xp->u_old = p->u + hdt * p->force.u_dt;
/* Move the particles with the velocitie at the half-step. */
p->x[0] = x[0] += dt * v_old[0];
p->x[1] = x[1] += dt * v_old[1];
p->x[2] = x[2] += dt * v_old[2];
dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
(x[1] - x_old[1])*(x[1] - x_old[1]) +
(x[2] - x_old[2])*(x[2] - x_old[2]) );
dx_max = fmaxf( dx_max , dx );
/* Update positions and energies at the half-step. */
p->v[0] = v[0] += dt * a[0];
p->v[1] = v[1] += dt * a[1];
p->v[2] = v[2] += dt * a[2];
w = u_dt / u * dt;
if ( fabsf( w ) < 0.01f )
p->u = u *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) );
else
p->u = u *= expf( w );
w = h_dt / h * dt;
if ( fabsf( w ) < 0.01f )
p->h = h *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) );
else
p->h = h *= expf( w );
h_max = fmaxf( h_max , h );
/* Integrate other values if this particle will not be updated. */
/* Init fields for density calculation. */
if ( pdt > dt_step ) {
float w = -3.0f * h_dt / h * dt;
if ( fabsf( w ) < 0.1f )
rho = p->rho *= 1.0f + w*( 1.0f + w*( 0.5f + w*(1.0f/6.0f + 1.0f/24.0f*w ) ) );
else
rho = p->rho *= expf( w );
p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho * xp->omega );
}
else {
p->density.wcount = 0.0f;
p->density.wcount_dh = 0.0f;
p->rho = 0.0f;
p->rho_dh = 0.0f;
p->density.div_v = 0.0f;
for ( j = 0 ; j < 3 ; ++j)
p->density.curl_v[j] = 0.0f;
}
}
/* Store the timer. */
c->toc = getticks();
}
/* Otherwise, agregate data from children. */
else {
/* Init with the first non-null child. */
dt_min = FLT_MAX;
dt_max = 0.0f;
h_max = 0.0f;
dx_max = 0.0f;
/* Loop over the remaining progeny. */
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL ) {
#pragma omp task
engine_map_kick_first( c->progeny[k] , e );
}
#pragma omp taskwait
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL ) {
dt_min = fminf( dt_min , c->progeny[k]->dt_min );
dt_max = fmaxf( dt_max , c->progeny[k]->dt_max );
h_max = fmaxf( h_max , c->progeny[k]->h_max );
dx_max = fmaxf( dx_max , c->progeny[k]->dx_max );
}
}
/* Store the values. */
c->dt_min = dt_min;
c->dt_max = dt_max;
c->h_max = h_max;
c->dx_max = dx_max;
}
/**
* @brief Mapping function to collect the data from the second kick.
*/
......@@ -736,36 +596,20 @@ void engine_step ( struct engine *e ) {
/* First kick. */
TIMER_TIC
// space_map_cells_post( e->s , 1 , &engine_map_kick_first , e );
/* k = 0;
#pragma omp parallel shared(k,e)
{
int myk;
while ( 1 ) {
#pragma omp critical
myk = k++;
if ( myk < e->s->nr_cells ) {
e->s->cells[myk].tic = getticks();
e->s->cells[myk].tid = omp_get_thread_num();
engine_map_kick_first( &e->s->cells[myk] , e );
e->s->cells[myk].toc = getticks();
}
else
break;
}
} */
#pragma omp parallel private(k)
{
#pragma omp single
{
for ( k = 0 ; k < e->s->nr_cells ; k++ ) {
#pragma omp task
engine_map_kick_first( &e->s->cells[k] , e );
}
}
#pragma omp taskwait
}
scheduler_start( &e->sched , (1 << task_type_kick1) );
e->barrier_count = -e->barrier_count;
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
error( "Failed to broadcast barrier open condition." );
while ( e->barrier_count < e->nr_threads )
if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
error( "Error while waiting for barrier." );
TIMER_TOC( timer_kick1 );
/* Check if all the kick1 threads have executed. */
for ( k = 0 ; k < e->sched.nr_tasks ; k++ )
if ( e->sched.tasks[k].type == task_type_kick1 &&
e->sched.tasks[k].tic == 0 )
error( "Not all kick1 tasks completed." );
// for(k=0; k<10; ++k)
// printParticle(parts, k);
......@@ -919,6 +763,10 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
/* Init the scheduler. */
scheduler_init( &e->sched , e->s , nr_queues , scheduler_flag_steal );
/* Append a kick1 task to each cell. */
scheduler_reset( &e->sched , s->tot_cells );
space_map_cells_pre( e->s , 1 , &scheduler_map_mkkick1 , &e->sched );
/* Allocate and init the threads. */
if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL )
error( "Failed to allocate threads array." );
......
......@@ -29,6 +29,7 @@
#define engine_policy_multistep 32
#define engine_queue_scale 1.2
#define engine_maxtaskspercell 32
/* Data structure for the engine. */
......
......@@ -607,6 +607,138 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) {
}
/**
* @brief Mapping function to set dt_min and dt_max, do the first
* kick.
*/
void runner_dokick1 ( struct runner *r , struct cell *c ) {
int j, k;
struct engine *e = r->e;
float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
float dt_min, dt_max, h_max, dx, dx_max;
float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho;
double x[3], x_old[3];
struct part *restrict p;
struct xpart *restrict xp;
/* No children? */
if ( !c->split ) {
/* Init the min/max counters. */
dt_min = FLT_MAX;
dt_max = 0.0f;
h_max = 0.0f;
dx_max = 0.0f;
/* Loop over parts. */
for ( k = 0 ; k < c->count ; k++ ) {
/* Get a handle on the kth particle. */
p = &c->parts[k];
xp = p->xtras;
/* Load the data locally. */
a[0] = p->a[0]; a[1] = p->a[1]; a[2] = p->a[2];
v[0] = p->v[0]; v[1] = p->v[1]; v[2] = p->v[2];
x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
x_old[0] = xp->x_old[0]; x_old[1] = xp->x_old[1]; x_old[2] = xp->x_old[2];
h = p->h;
u = p->u;
h_dt = p->force.h_dt;
u_dt = p->force.u_dt;
pdt = p->dt;
/* Store the min/max dt. */
dt_min = fminf( dt_min , pdt );
dt_max = fmaxf( dt_max , pdt );
/* Step and store the velocity and internal energy. */
xp->v_old[0] = v_old[0] = v[0] + hdt * a[0];
xp->v_old[1] = v_old[1] = v[1] + hdt * a[1];
xp->v_old[2] = v_old[2] = v[2] + hdt * a[2];
xp->u_old = p->u + hdt * p->force.u_dt;
/* Move the particles with the velocitie at the half-step. */
p->x[0] = x[0] += dt * v_old[0];
p->x[1] = x[1] += dt * v_old[1];
p->x[2] = x[2] += dt * v_old[2];
dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
(x[1] - x_old[1])*(x[1] - x_old[1]) +
(x[2] - x_old[2])*(x[2] - x_old[2]) );
dx_max = fmaxf( dx_max , dx );
/* Update positions and energies at the half-step. */
p->v[0] = v[0] += dt * a[0];
p->v[1] = v[1] += dt * a[1];
p->v[2] = v[2] += dt * a[2];
w = u_dt / u * dt;
if ( fabsf( w ) < 0.01f )
p->u = u *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) );
else
p->u = u *= expf( w );
w = h_dt / h * dt;
if ( fabsf( w ) < 0.01f )
p->h = h *= 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) );
else
p->h = h *= expf( w );
h_max = fmaxf( h_max , h );
/* Integrate other values if this particle will not be updated. */
/* Init fields for density calculation. */
if ( pdt > dt_step ) {
float w = -3.0f * h_dt / h * dt;
if ( fabsf( w ) < 0.1f )
rho = p->rho *= 1.0f + w*( 1.0f + w*( 0.5f + w*(1.0f/6.0f + 1.0f/24.0f*w ) ) );
else
rho = p->rho *= expf( w );
p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho * xp->omega );
}
else {
p->density.wcount = 0.0f;
p->density.wcount_dh = 0.0f;
p->rho = 0.0f;
p->rho_dh = 0.0f;
p->density.div_v = 0.0f;
for ( j = 0 ; j < 3 ; ++j)
p->density.curl_v[j] = 0.0f;
}
}
}
/* Otherwise, agregate data from children. */
else {
/* Init with the first non-null child. */
dt_min = FLT_MAX;
dt_max = 0.0f;
h_max = 0.0f;
dx_max = 0.0f;
/* Loop over the progeny. */
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL ) {
dt_min = fminf( dt_min , c->progeny[k]->dt_min );
dt_max = fmaxf( dt_max , c->progeny[k]->dt_max );
h_max = fmaxf( h_max , c->progeny[k]->h_max );
dx_max = fmaxf( dx_max , c->progeny[k]->dx_max );
}
}
/* Store the values. */
c->dt_min = dt_min;
c->dt_max = dt_max;
c->h_max = h_max;
c->dx_max = dx_max;
}
/**
* @brief The #runner main thread routine.
*
......@@ -679,6 +811,9 @@ void *runner_main ( void *data ) {
if ( ci->super == ci )
runner_doghost( r , ci );
break;
case task_type_kick1:
runner_dokick1( r , ci );
break;
case task_type_kick2:
runner_dokick2( r , ci );
break;
......
......@@ -71,7 +71,12 @@ void scheduler_map_mkghosts ( struct cell *c , void *data ) {
if ( c->super != c || c->nr_tasks > 0 )
c->ghost = scheduler_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , 0 );
/* Append a kick task if we are the active super cell. */
/* Append a kick1 task and make sure the parent depends on it. */
c->kick1 = scheduler_addtask( s , task_type_kick1 , task_subtype_none , 0 , 0 , c , NULL , 0 );
if ( c->parent != NULL )
task_addunlock( c->kick1 , c->parent->kick1 );
/* Append a kick2 task if we are the active super cell. */
if ( c->super == c && c->nr_tasks > 0 )
c->kick2 = scheduler_addtask( s , task_type_kick2 , task_subtype_none , 0 , 0 , c , NULL , 0 );
......@@ -83,6 +88,29 @@ void scheduler_map_mkghosts ( struct cell *c , void *data ) {
}
/**
* @brief Mapping function to append a ghost task to each cell.
*
* A kick1-task is appended to each cell.
*/
void scheduler_map_mkkick1 ( struct cell *c , void *data ) {
struct scheduler *s = (struct scheduler *)data;
struct cell *finger;
/* Append a kick1 task and make sure the parent depends on it. */
c->kick1 = scheduler_addtask( s , task_type_kick1 , task_subtype_none , 0 , 0 , c , NULL , 0 );
if ( c->parent != NULL )
task_addunlock( c->kick1 , c->parent->kick1 );
/* Set a bogus super cell. */
for ( finger = c ; finger->parent != NULL ; finger = finger->parent );
c->super = finger;
}
/**
* @brief Split tasks that may be too large.
*
......@@ -525,7 +553,7 @@ void scheduler_reset ( struct scheduler *s , int size ) {
* @param s The #scheduler.
*/
void scheduler_start ( struct scheduler *s ) {
void scheduler_start ( struct scheduler *s , unsigned int mask ) {
int k, j, *tid = s->tasks_ind;
struct task *t, *tasks = s->tasks;
......@@ -535,7 +563,7 @@ void scheduler_start ( struct scheduler *s ) {
// #pragma omp parallel for schedule(static) private(t,j)
for ( k = s->nr_tasks-1 ; k >= 0 ; k-- ) {
t = &tasks[ tid[k] ];
if ( !t->skip ) {
if ( ( (1 << t->type) & mask ) && !t->skip ) {
for ( j = 0 ; j < t->nr_unlock_tasks ; j++ )
atomic_inc( &t->unlock_tasks[j]->wait );
t->maxdepth = 0;
......@@ -570,6 +598,7 @@ void scheduler_start ( struct scheduler *s ) {
if ( t->ci == t->ci->super )
t->weight += t->ci->count;
break;
case task_type_kick1:
case task_type_kick2:
t->weight += t->ci->count;
break;
......@@ -580,7 +609,7 @@ void scheduler_start ( struct scheduler *s ) {
/* Loop over the tasks and enqueue whoever is ready. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( !t->skip && t->wait == 0 )
if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 )
scheduler_enqueue( s , t );
}
......@@ -607,6 +636,7 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
case task_type_self:
case task_type_sort:
case task_type_ghost:
case task_type_kick1:
case task_type_kick2:
qid = t->ci->super->owner;
break;
......@@ -617,6 +647,8 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
( qid < 0 || s->queues[qid].count > s->queues[t->cj->super->owner].count ) )
qid = t->cj->super->owner;
break;
default:
qid = -1;
}
/* If no previous owner, find the shortest queue. */
......
......@@ -71,10 +71,11 @@ struct scheduler {
void scheduler_init ( struct scheduler *s , struct space *space , int nr_queues , unsigned int flags );
struct task *scheduler_gettask ( struct scheduler *s , int qid );
void scheduler_enqueue ( struct scheduler *s , struct task *t );
void scheduler_start ( struct scheduler *s );
void scheduler_start ( struct scheduler *s , unsigned int mask );
void scheduler_reset ( struct scheduler *s , int nr_tasks );
void scheduler_ranktasks ( struct scheduler *s );
struct task *scheduler_addtask ( struct scheduler *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , int tight );
void scheduler_splittasks ( struct scheduler *s );
void scheduler_map_mkghosts ( struct cell *c , void *data );
void scheduler_map_mkkick1 ( struct cell *c , void *data );
void scheduler_done ( struct scheduler *s , struct task *t );
......@@ -27,7 +27,6 @@
#define space_splitsize_default 400
#define space_subsize_default 5000
#define space_stretch 1.05f
#define space_maxtaskspercell 31
#define space_maxreldx 0.2f
......
......@@ -39,7 +39,7 @@
#include "error.h"
/* Task type names. */
const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair" , "sub" , "ghost" , "kick2" };
const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair" , "sub" , "ghost" , "kick1" , "kick2" };
/**
......
......@@ -31,6 +31,7 @@ enum task_types {
task_type_pair,
task_type_sub,
task_type_ghost,
task_type_kick1,
task_type_kick2,
task_type_count
};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment