Skip to content
Snippets Groups Projects
Commit c826e1a1 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

more efficient distribution of work in engine_prepare.

Former-commit-id: 1d78f54c1bc07c1f0d81e0451509058f106b6c08
parent 58d81f85
No related branches found
No related tags found
No related merge requests found
...@@ -72,7 +72,7 @@ void engine_prepare ( struct engine *e ) { ...@@ -72,7 +72,7 @@ void engine_prepare ( struct engine *e ) {
/* Rebuild the space. */ /* Rebuild the space. */
// tic = getticks(); // tic = getticks();
space_prepare( e->s ); space_prepare( e->s );
// printf( "engine_prepare: space_prepare with %i changes took %.3f ms.\n" , changes , (double)(getticks() - tic) / CPU_TPS * 1000 ); // printf( "engine_prepare: space_prepare took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
// tic = getticks(); // tic = getticks();
/* Init the queues (round-robin). */ /* Init the queues (round-robin). */
...@@ -92,7 +92,7 @@ void engine_prepare ( struct engine *e ) { ...@@ -92,7 +92,7 @@ void engine_prepare ( struct engine *e ) {
/* Re-set the particle data. */ /* Re-set the particle data. */
// tic = getticks(); // tic = getticks();
#pragma omp parallel for schedule(static) private(j) #pragma omp parallel for schedule(static) private(j)
for ( k = 0 ; k < s->nr_parts ; k++ ) for ( k = 0 ; k < s->nr_parts ; k++ )
if ( s->parts[k].dt <= dt_step ) { if ( s->parts[k].dt <= dt_step ) {
s->parts[k].wcount = 0.0f; s->parts[k].wcount = 0.0f;
...@@ -170,6 +170,127 @@ void engine_barrier( struct engine *e ) { ...@@ -170,6 +170,127 @@ void engine_barrier( struct engine *e ) {
} }
/**
* @brief Mapping function to set dt_min and dt_max, do the first
* kick.
*/
void engine_map_kick_first ( struct cell *c , void *data ) {
int j, k;
struct engine *e = (struct engine *)data;
float dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
float dt_min, dt_max, h_max, dx_max;
struct part *restrict p;
struct xpart *restrict xp;
struct cpart *restrict cp;
/* No children? */
if ( !c->split ) {
/* Init the min/max counters. */
dt_min = FLT_MAX;
dt_max = 0.0f;
h_max = 0.0f;
dx_max = 0.0f;
/* Loop over parts. */
for ( k = 0 ; k < c->count ; k++ ) {
/* Get a handle on the kth particle. */
p = &c->parts[k];
xp = p->xtras;
cp = &c->cparts[k];
/* Store the min/max dt. */
dt_min = fminf( dt_min , p->dt );
dt_max = fmaxf( dt_max , p->dt );
/* Step and store the velocity and internal energy. */
xp->v_old[0] = p->v[0] + hdt * p->a[0];
xp->v_old[1] = p->v[1] + hdt * p->a[1];
xp->v_old[2] = p->v[2] + hdt * p->a[2];
xp->u_old = p->u + hdt * p->force.u_dt;
/* Move the particles with the velocitie at the half-step. */
p->x[0] += dt * xp->v_old[0];
p->x[1] += dt * xp->v_old[1];
p->x[2] += dt * xp->v_old[2];
dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h );
/* Update positions and energies at the half-step. */
p->v[0] += dt * p->a[0];
p->v[1] += dt * p->a[1];
p->v[2] += dt * p->a[2];
p->u *= expf( p->force.u_dt / p->u * dt );
p->h *= expf( p->force.h_dt / p->h * dt );
h_max = fmaxf( h_max , p->h );
/* Integrate other values if this particle will not be updated. */
if ( p->dt > dt_step ) {
p->rho *= expf( -3.0f * p->force.h_dt / p->h * dt );
p->force.POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
/* Fill the cpart. */
cp->x[0] = p->x[0];
cp->x[1] = p->x[1];
cp->x[2] = p->x[2];
cp->h = p->h;
cp->dt = p->dt;
/* Init fields for density calculation. */
if ( p->dt <= dt_step ) {
p->wcount = 0.0f;
p->density.wcount_dh = 0.0f;
p->rho = 0.0f;
p->rho_dh = 0.0f;
p->density.div_v = 0.0f;
for ( j = 0 ; j < 3 ; ++j)
p->density.curl_v[j] = 0.0f;
}
}
}
/* Otherwise, agregate data from children. */
else {
/* Init with the first non-null child. */
for ( k = 0 ; c->progeny[k] == 0 ; k++ );
dt_min = c->progeny[k]->dt_min;
dt_max = c->progeny[k]->dt_max;
h_max = c->progeny[k]->h_max;
dx_max = c->progeny[k]->dx_max;
/* Loop over the remaining progeny. */
for ( k += 1 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL ) {
dt_min = fminf( dt_min , c->progeny[k]->dt_min );
dt_max = fmaxf( dt_max , c->progeny[k]->dt_max );
h_max = fmaxf( h_max , c->progeny[k]->h_max );
dx_max = fmaxf( dx_max , c->progeny[k]->dx_max );
}
}
/* Store the values. */
c->dt_min = dt_min;
c->dt_max = dt_max;
c->h_max = h_max;
c->dx_max = dx_max;
/* Clean out the task pointers. */
c->sorts[0] = NULL;
c->nr_tasks = 0;
c->nr_density = 0;
}
/** /**
* @brief Let the #engine loose to compute the forces. * @brief Let the #engine loose to compute the forces.
* *
...@@ -201,38 +322,7 @@ void engine_step ( struct engine *e , int sort_queues ) { ...@@ -201,38 +322,7 @@ void engine_step ( struct engine *e , int sort_queues ) {
/* First kick. */ /* First kick. */
TIMER_TIC TIMER_TIC
#pragma omp parallel for schedule(static) private(p,xp) space_map_cells_post( e->s , 1 , &engine_map_kick_first , e );
for ( k = 0 ; k < nr_parts ; k++ ) {
/* Get a handle on the part. */
p = &parts[k];
xp = p->xtras;
/* Step and store the velocity and internal energy. */
xp->v_old[0] = p->v[0] + hdt * p->a[0];
xp->v_old[1] = p->v[1] + hdt * p->a[1];
xp->v_old[2] = p->v[2] + hdt * p->a[2];
xp->u_old = p->u + hdt * p->force.u_dt;
/* Move the particles with the velocitie at the half-step. */
p->x[0] += dt * xp->v_old[0];
p->x[1] += dt * xp->v_old[1];
p->x[2] += dt * xp->v_old[2];
/* Update positions and energies at the half-step. */
p->v[0] += dt * p->a[0];
p->v[1] += dt * p->a[1];
p->v[2] += dt * p->a[2];
p->u *= expf( p->force.u_dt / p->u * dt );
p->h *= expf( p->force.h_dt / p->h * dt );
/* Integrate other values if this particle will not be updated. */
if ( p->dt > dt_step ) {
p->rho *= expf( -3.0f * p->force.h_dt / p->h * dt );
p->force.POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
}
TIMER_TOC( timer_kick1 ); TIMER_TOC( timer_kick1 );
// for(k=0; k<10; ++k) // for(k=0; k<10; ++k)
......
...@@ -85,8 +85,9 @@ void space_map_prepare ( struct cell *c , void *data ) { ...@@ -85,8 +85,9 @@ void space_map_prepare ( struct cell *c , void *data ) {
int k; int k;
float dt_min, dt_max, h_max, dx_max; float dt_min, dt_max, h_max, dx_max;
struct part *p; struct part *restrict p;
struct xpart *xp; struct xpart *restrict xp;
struct cpart *restrict cp;
/* No children? */ /* No children? */
if ( !c->split ) { if ( !c->split ) {
...@@ -94,23 +95,36 @@ void space_map_prepare ( struct cell *c , void *data ) { ...@@ -94,23 +95,36 @@ void space_map_prepare ( struct cell *c , void *data ) {
/* Init with first part. */ /* Init with first part. */
p = &c->parts[0]; p = &c->parts[0];
xp = p->xtras; xp = p->xtras;
cp = &c->cparts[0];
dt_min = p->dt; dt_min = p->dt;
dt_max = p->dt; dt_max = p->dt;
h_max = p->h; h_max = p->h;
dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h; (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h;
cp->x[0] = p->x[0];
cp->x[1] = p->x[1];
cp->x[2] = p->x[2];
cp->h = p->h;
cp->dt = p->dt;
/* Loop over parts. */ /* Loop over parts. */
for ( k = 1 ; k < c->count ; k++ ) { for ( k = 1 ; k < c->count ; k++ ) {
p = &c->parts[k]; p = &c->parts[k];
xp = p->xtras; xp = p->xtras;
cp = &c->cparts[0];
dt_min = fminf( dt_min , p->dt ); dt_min = fminf( dt_min , p->dt );
dt_max = fmaxf( dt_max , p->dt ); dt_max = fmaxf( dt_max , p->dt );
h_max = fmaxf( h_max , p->h ); h_max = fmaxf( h_max , p->h );
dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) +
(p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) +
(p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h ); (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h );
cp->x[0] = p->x[0];
cp->x[1] = p->x[1];
cp->x[2] = p->x[2];
cp->h = p->h;
cp->dt = p->dt;
} }
} }
...@@ -166,9 +180,12 @@ void space_prepare ( struct space *s ) { ...@@ -166,9 +180,12 @@ void space_prepare ( struct space *s ) {
struct task *t; struct task *t;
float dt_step = s->dt_step, dx_max = 0.0f; float dt_step = s->dt_step, dx_max = 0.0f;
int counts[ task_type_count + 1 ]; int counts[ task_type_count + 1 ];
ticks tic;
/* Traverse the cells and set their dt_min and dx_max. */ /* Traverse the cells and set their dt_min and dx_max. */
space_map_cells_post( s , 1 , &space_map_prepare , NULL ); // tic = getticks();
// space_map_cells_post( s , 1 , &space_map_prepare , NULL );
// printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Get the maximum displacement in the whole system. */ /* Get the maximum displacement in the whole system. */
for ( k = 0 ; k < s->nr_cells ; k++ ) for ( k = 0 ; k < s->nr_cells ; k++ )
...@@ -176,6 +193,7 @@ void space_prepare ( struct space *s ) { ...@@ -176,6 +193,7 @@ void space_prepare ( struct space *s ) {
printf( "space_prepare: dx_max is %e.\n" , dx_max ); printf( "space_prepare: dx_max is %e.\n" , dx_max );
/* Run through the tasks and mark as skip or not. */ /* Run through the tasks and mark as skip or not. */
tic = getticks();
for ( k = 0 ; k < s->nr_tasks ; k++ ) { for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k]; t = &s->tasks[k];
if ( t->type == task_type_sort || if ( t->type == task_type_sort ||
...@@ -190,34 +208,30 @@ void space_prepare ( struct space *s ) { ...@@ -190,34 +208,30 @@ void space_prepare ( struct space *s ) {
break; break;
} }
} }
printf( "space_prepare: checking tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Did this not go through? */ /* Did this not go through? */
if ( k < s->nr_tasks ) { if ( k < s->nr_tasks ) {
/* Re-build the space. */ /* Re-build the space. */
tic = getticks();
space_rebuild( s , 0.0 ); space_rebuild( s , 0.0 );
printf( "space_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Traverse the cells and set their dt_min and dx_max. */ /* Traverse the cells and set their dt_min and dx_max. */
tic = getticks();
space_map_cells_post( s , 1 , &space_map_prepare , NULL ); space_map_cells_post( s , 1 , &space_map_prepare , NULL );
printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
} }
/* Store the condensed particle data. */
#pragma omp parallel for schedule(static)
for ( k = 0 ; k < s->nr_parts ; k++ ) {
s->cparts[k].x[0] = s->parts[k].x[0];
s->cparts[k].x[1] = s->parts[k].x[1];
s->cparts[k].x[2] = s->parts[k].x[2];
s->cparts[k].h = s->parts[k].h;
s->cparts[k].dt = s->parts[k].dt;
}
/* Now that we have the cell structre, re-build the tasks. */ /* Now that we have the cell structre, re-build the tasks. */
// tic = getticks(); tic = getticks();
space_maketasks( s , 1 ); space_maketasks( s , 1 );
// printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Count the number of each task type. */ /* Count the number of each task type. */
tic = getticks();
for ( k = 0 ; k <= task_type_count ; k++ ) for ( k = 0 ; k <= task_type_count ; k++ )
counts[k] = 0; counts[k] = 0;
for ( k = 0 ; k < s->nr_tasks ; k++ ) for ( k = 0 ; k < s->nr_tasks ; k++ )
...@@ -229,6 +243,7 @@ void space_prepare ( struct space *s ) { ...@@ -229,6 +243,7 @@ void space_prepare ( struct space *s ) {
for ( k = 1 ; k < task_type_count ; k++ ) for ( k = 1 ; k < task_type_count ; k++ )
printf( " %s=%i" , taskID_names[k] , counts[k] ); printf( " %s=%i" , taskID_names[k] , counts[k] );
printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout);
printf( "space_prepare: task counting took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
} }
...@@ -762,6 +777,7 @@ void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct ce ...@@ -762,6 +777,7 @@ void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct ce
} }
/* Call the recursive function on all higher-level cells. */ /* Call the recursive function on all higher-level cells. */
#pragma omp parallel for schedule(dynamic,1)
for ( i = 0 ; i < s->nr_cells ; i++ ) for ( i = 0 ; i < s->nr_cells ; i++ )
rec_map( &s->cells[i] ); rec_map( &s->cells[i] );
...@@ -798,6 +814,7 @@ void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cel ...@@ -798,6 +814,7 @@ void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cel
} }
/* Call the recursive function on all higher-level cells. */ /* Call the recursive function on all higher-level cells. */
#pragma omp parallel for schedule(dynamic,1)
for ( i = 0 ; i < s->nr_cells ; i++ ) for ( i = 0 ; i < s->nr_cells ; i++ )
rec_map( &s->cells[i] ); rec_map( &s->cells[i] );
...@@ -825,6 +842,7 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell ...@@ -825,6 +842,7 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell
} }
/* Call the recursive function on all higher-level cells. */ /* Call the recursive function on all higher-level cells. */
#pragma omp parallel for schedule(dynamic,1)
for ( i = 0 ; i < s->nr_cells ; i++ ) for ( i = 0 ; i < s->nr_cells ; i++ )
rec_map( &s->cells[i] ); rec_map( &s->cells[i] );
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment