Commit 9ed62d9d authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

cleaned up engine_prepare a bit.


Former-commit-id: cd3a33df122fe358fb3ff244bc63106451abd5f5
parent 19e474c8
...@@ -60,55 +60,60 @@ ...@@ -60,55 +60,60 @@
void engine_prepare ( struct engine *e , int force ) { void engine_prepare ( struct engine *e , int force ) {
int j, k, qid, changes; int j, k, qid, changes, count;
struct space *s = e->s; struct space *s = e->s;
// ticks tic;
/* Rebuild the space. */ /* Rebuild the space. */
// tic = getticks();
changes = space_rebuild( e->s , force , 0 ); changes = space_rebuild( e->s , force , 0 );
// printf( "engine_prepare: space_rebuild with %i changes.\n" , changes ); // printf( "engine_prepare: space_rebuild with %i changes took %.3f ms.\n" , changes , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Has anything changed? */ /* Has anything changed? */
// tic = getticks();
if ( changes ) { if ( changes ) {
/* Rank the tasks in topological order. */ /* Rank the tasks in topological order. */
engine_ranktasks( e ); engine_ranktasks( e );
/* Clear the queues. */
for ( k = 0 ; k < e->nr_queues ; k++ )
e->queues[k].count = 0;
/* Re-allocate the queue buffers? */
for ( k = 0 ; k < e->nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* Fill the queues (round-robin). */ /* Fill the queues (round-robin). */
for ( k = 0 ; k < s->nr_tasks ; k++ ) { #pragma omp parallel for schedule(static) private(count,k)
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none ) for ( qid = 0 ; qid < e->nr_queues ; qid++ ) {
continue; queue_init( &e->queues[qid] , s->nr_tasks , s->tasks );
qid = k % e->nr_queues; for ( count = 0 , k = qid ; k < s->nr_tasks ; k += e->nr_queues ) {
e->queues[qid].tid[ e->queues[qid].count ] = s->tasks_ind[k]; if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
e->queues[qid].count += 1; continue;
e->queues[qid].tid[ count ] = s->tasks_ind[k];
count += 1;
}
e->queues[qid].count = count;
e->queues[qid].next = 0;
} }
} }
// printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Re-set the particle data. */ /* Re-set the particle data. */
#pragma omp parallel for // tic = getticks();
#pragma omp parallel for schedule(static)
for ( k = 0 ; k < s->nr_parts ; k++ ) { for ( k = 0 ; k < s->nr_parts ; k++ ) {
s->parts[k].wcount = 0.0f; s->parts[k].wcount = 0.0f;
s->parts[k].wcount_dh = 0.0f; s->parts[k].wcount_dh = 0.0f;
s->parts[k].rho = 0.0f; s->parts[k].rho = 0.0f;
s->parts[k].rho_dh = 0.0f; s->parts[k].rho_dh = 0.0f;
} }
// printf( "engine_prepare: re-setting particle data took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Run throught the tasks and get all the waits right. */ /* Run throught the tasks and get all the waits right. */
#pragma omp parallel for private(j) // tic = getticks();
#pragma omp parallel for schedule(static) private(j)
for ( k = 0 ; k < s->nr_tasks ; k++ ) { for ( k = 0 ; k < s->nr_tasks ; k++ ) {
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ ) for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 ); __sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 );
for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ ) for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_cells[j]->wait , 1 ); __sync_add_and_fetch( &s->tasks[k].unlock_cells[j]->wait , 1 );
} }
// printf( "engine_prepare: preparing task dependencies took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Re-set the queues.*/ /* Re-set the queues.*/
for ( k = 0 ; k < e->nr_queues ; k++ ) for ( k = 0 ; k < e->nr_queues ; k++ )
......
...@@ -269,8 +269,10 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { ...@@ -269,8 +269,10 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
struct part *finger; struct part *finger;
struct cpart *cfinger; struct cpart *cfinger;
int *ind, changes = 0; int *ind, changes = 0;
// ticks tic;
/* Run through the parts and get the current h_max. */ /* Run through the parts and get the current h_max. */
// tic = getticks();
for ( k = 0 ; k < s->nr_parts ; k++ ) { for ( k = 0 ; k < s->nr_parts ; k++ ) {
if ( s->parts[k].h > h_max ) if ( s->parts[k].h > h_max )
h_max = s->parts[k].h; h_max = s->parts[k].h;
...@@ -279,12 +281,14 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { ...@@ -279,12 +281,14 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
} }
s->h_min = h_min; s->h_min = h_min;
s->h_max = h_max; s->h_max = h_max;
// printf( "space_rebuild: getting h_min and h_max took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Get the new putative cell dimensions. */ /* Get the new putative cell dimensions. */
for ( k = 0 ; k < 3 ; k++ ) for ( k = 0 ; k < 3 ; k++ )
cdim[k] = floor( s->dim[k] / fmax( h_max*space_stretch , cell_max ) ); cdim[k] = floor( s->dim[k] / fmax( h_max*space_stretch , cell_max ) );
/* Do we need to re-build the upper-level cells? */ /* Do we need to re-build the upper-level cells? */
// tic = getticks();
if ( force || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2] ) { if ( force || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2] ) {
/* Free the old cells, if they were allocated. */ /* Free the old cells, if they were allocated. */
...@@ -328,9 +332,11 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { ...@@ -328,9 +332,11 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
changes = 1; changes = 1;
} /* re-build upper-level cells? */ } /* re-build upper-level cells? */
// printf( "space_rebuild: rebuilding upper-level cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Run through the particles and get their cell index. */ /* Run through the particles and get their cell index. */
// tic = getticks();
if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL ) if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL )
error( "Failed to allocate temporary particle indices." ); error( "Failed to allocate temporary particle indices." );
for ( k = 0 ; k < s->nr_cells ; k++ ) for ( k = 0 ; k < s->nr_cells ; k++ )
...@@ -339,14 +345,19 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { ...@@ -339,14 +345,19 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
ind[k] = cell_getid( s->cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] ); ind[k] = cell_getid( s->cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] );
s->cells[ ind[k] ].count += 1; s->cells[ ind[k] ].count += 1;
} }
// printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Sort the parts according to their cells. */ /* Sort the parts according to their cells. */
// tic = getticks();
parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells ); parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );
// printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* We no longer need the indices as of here. */ /* We no longer need the indices as of here. */
free( ind ); free( ind );
/* Update the condensed particle data. */ /* Update the condensed particle data. */
// tic = getticks();
#pragma omp parallel for schedule(static)
for ( k = 0 ; k < s->nr_parts ; k++ ) { for ( k = 0 ; k < s->nr_parts ; k++ ) {
s->cparts[k].x[0] = s->parts[k].x[0]; s->cparts[k].x[0] = s->parts[k].x[0];
s->cparts[k].x[1] = s->parts[k].x[1]; s->cparts[k].x[1] = s->parts[k].x[1];
...@@ -354,8 +365,10 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { ...@@ -354,8 +365,10 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
s->cparts[k].h = s->parts[k].h; s->cparts[k].h = s->parts[k].h;
s->cparts[k].dt = s->parts[k].dt; s->cparts[k].dt = s->parts[k].dt;
} }
// printf( "space_rebuild: creating condensed parts took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Hook the cells up to the parts. */ /* Hook the cells up to the parts. */
// tic = getticks();
finger = s->parts; finger = s->parts;
cfinger = s->cparts; cfinger = s->cparts;
for ( k = 0 ; k < s->nr_cells ; k++ ) { for ( k = 0 ; k < s->nr_cells ; k++ ) {
...@@ -365,17 +378,22 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { ...@@ -365,17 +378,22 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
finger = &finger[ c->count ]; finger = &finger[ c->count ];
cfinger = &cfinger[ c->count ]; cfinger = &cfinger[ c->count ];
} }
// printf( "space_rebuild: hooking up cells took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* At this point, we have the upper-level cells, old or new. Now make /* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */ sure that the parts in each cell are ok. */
#pragma omp parallel for shared(s) reduction(+:changes) // tic = getticks();
#pragma omp parallel for schedule(dynamic) shared(s) reduction(+:changes)
for ( k = 0 ; k < s->nr_cells ; k++ ) for ( k = 0 ; k < s->nr_cells ; k++ )
changes += space_rebuild_recurse( s , &s->cells[k] ); changes += space_rebuild_recurse( s , &s->cells[k] );
// printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Now that we have the cell structre, re-build the tasks. */ /* Now that we have the cell structre, re-build the tasks. */
// tic = getticks();
if ( changes ) if ( changes )
space_maketasks( s , 1 ); space_maketasks( s , 1 );
// printf( "space_rebuild: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Return the number of changes. */ /* Return the number of changes. */
return changes; return changes;
...@@ -460,16 +478,16 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) { ...@@ -460,16 +478,16 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
} }
else else
#pragma omp parallel sections // #pragma omp parallel sections
{ {
/* Recurse on the left? */ /* Recurse on the left? */
#pragma omp section // #pragma omp section
if ( j > 0 && pivot > min ) if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot ); parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the right? */ /* Recurse on the right? */
#pragma omp section // #pragma omp section
if ( i < N && pivot+1 < max ) if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment