Commit da35b5df authored by Matthieu Schaller's avatar Matthieu Schaller
Browse files

Removed the last traces of OpenMP in the code.


Former-commit-id: dbf79c6373aaf253e7e1898aee5afad68ee30c01
parent b48473af
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
/* Local headers. */ /* Local headers. */
#include "const.h" #include "const.h"
#include "cycle.h" #include "cycle.h"
#include "atomic.h"
#include "lock.h" #include "lock.h"
#include "task.h" #include "task.h"
#include "kernel.h" #include "kernel.h"
...@@ -335,7 +334,7 @@ void space_rebuild ( struct space *s , double cell_max ) { ...@@ -335,7 +334,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
else if ( p->x[j] >= dim[j] ) else if ( p->x[j] >= dim[j] )
p->x[j] -= dim[j]; p->x[j] -= dim[j];
ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] ); ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] );
atomic_inc( &cells[ ind[k] ].count ); cells[ ind[k] ].count++;
} }
// message( "getting particle indices took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 ); // message( "getting particle indices took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 );
...@@ -421,7 +420,7 @@ void space_rebuild ( struct space *s , double cell_max ) { ...@@ -421,7 +420,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
else if ( gp->x[j] >= dim[j] ) else if ( gp->x[j] >= dim[j] )
gp->x[j] -= dim[j]; gp->x[j] -= dim[j];
ind[k] = cell_getid( cdim , gp->x[0]*ih[0] , gp->x[1]*ih[1] , gp->x[2]*ih[2] ); ind[k] = cell_getid( cdim , gp->x[0]*ih[0] , gp->x[1]*ih[1] , gp->x[2]*ih[2] );
atomic_inc( &cells[ ind[k] ].gcount ); cells[ ind[k] ].gcount++;
} }
// message( "getting particle indices took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 ); // message( "getting particle indices took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 );
...@@ -461,15 +460,8 @@ void space_rebuild ( struct space *s , double cell_max ) { ...@@ -461,15 +460,8 @@ void space_rebuild ( struct space *s , double cell_max ) {
/* At this point, we have the upper-level cells, old or new. Now make /* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */ sure that the parts in each cell are ok. */
// tic = getticks(); // tic = getticks();
k = 0; for ( k = 0; k < s->nr_cells; k++ )
if ( omp_get_thread_num() < 8 ) space_split( s , &cells[k] );
while ( 1 ) {
int myk = atomic_inc( &k );
if ( myk < s->nr_cells )
space_split( s , &cells[myk] );
else
break;
}
// message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 ); // message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 );
...@@ -520,122 +512,108 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N , ...@@ -520,122 +512,108 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
qstack[i].ready = 0; qstack[i].ready = 0;
first = 0; last = 1; waiting = 1; first = 0; last = 1; waiting = 1;
/* Parallel bit. */ /* Main loop. */
#pragma omp parallel default(shared) shared(N,first,last,waiting,qstack,parts,xparts,ind,qstack_size,stderr,engine_rank) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_xp,temp_p) while ( waiting > 0 ) {
{
/* Main loop. */
if ( omp_get_thread_num() < 8 )
while ( waiting > 0 ) {
/* Grab an interval off the queue. */ /* Grab an interval off the queue. */
qid = atomic_inc( &first ) % qstack_size; qid = ( first++ ) % qstack_size;
/* Wait for the interval to be ready. */
while ( waiting > 0 && atomic_cas( &qstack[qid].ready , 1 , 1 ) != 1 );
/* Broke loop for all the wrong reasons? */
if ( waiting == 0 )
break;
/* Get the stack entry. */ /* Get the stack entry. */
i = qstack[qid].i; i = qstack[qid].i;
j = qstack[qid].j; j = qstack[qid].j;
min = qstack[qid].min; min = qstack[qid].min;
max = qstack[qid].max; max = qstack[qid].max;
qstack[qid].ready = 0; qstack[qid].ready = 0;
// message( "thread %i got interval [%i,%i] with values in [%i,%i]." , omp_get_thread_num() , i , j , min , max );
/* Loop over sub-intervals. */
while ( 1 ) {
/* Bring beer. */ /* Loop over sub-intervals. */
pivot = (min + max) / 2; while ( 1 ) {
/* One pass of QuickSort's partitioning. */ /* Bring beer. */
ii = i; jj = j; pivot = (min + max) / 2;
while ( ii < jj ) {
while ( ii <= j && ind[ii] <= pivot ) /* One pass of QuickSort's partitioning. */
ii++; ii = i; jj = j;
while ( jj >= i && ind[jj] > pivot ) while ( ii < jj ) {
jj--; while ( ii <= j && ind[ii] <= pivot )
if ( ii < jj ) { ii++;
temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i; while ( jj >= i && ind[jj] > pivot )
temp_p = parts[ii]; parts[ii] = parts[jj]; parts[jj] = temp_p; jj--;
temp_xp = xparts[ii]; xparts[ii] = xparts[jj]; xparts[jj] = temp_xp; if ( ii < jj ) {
} temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
} temp_p = parts[ii]; parts[ii] = parts[jj]; parts[jj] = temp_p;
temp_xp = xparts[ii]; xparts[ii] = xparts[jj]; xparts[jj] = temp_xp;
/* Verify sort. */ }
/* for ( int k = i ; k <= jj ; k++ ) }
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); /* Verify sort. */
error( "Partition failed (<=pivot)." ); /* for ( int k = i ; k <= jj ; k++ )
} if ( ind[k] > pivot ) {
for ( int k = jj+1 ; k <= j ; k++ ) message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
if ( ind[k] <= pivot ) { error( "Partition failed (<=pivot)." );
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); }
error( "Partition failed (>pivot)." ); for ( int k = jj+1 ; k <= j ; k++ )
} */ if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
/* Split-off largest interval. */ error( "Partition failed (>pivot)." );
if ( jj - i > j - jj+1 ) { } */
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
i = jj+1;
min = pivot+1;
}
else
break;
}
else {
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the left? */ /* Split-off largest interval. */
if ( jj > i && pivot > min ) { if ( jj - i > j - jj+1 ) {
j = jj;
max = pivot; /* Recurse on the left? */
} if ( jj > i && pivot > min ) {
else qid = ( last++ ) % qstack_size;
break; qstack[qid].i = i;
qstack[qid].j = jj;
} qstack[qid].min = min;
qstack[qid].max = pivot;
} /* loop over sub-intervals. */ qstack[qid].ready = 1;
if ( waiting++ >= qstack_size )
atomic_dec( &waiting ); error( "Qstack overflow." );
}
} /* main loop. */
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
i = jj+1;
min = pivot+1;
}
else
break;
}
else {
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = ( last++ ) % qstack_size;
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ( ( waiting++ ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
j = jj;
max = pivot;
}
else
break;
}
} /* loop over sub-intervals. */
waiting--;
} /* main loop. */
} /* parallel bit. */
/* Verify sort. */ /* Verify sort. */
/* for ( i = 1 ; i < N ; i++ ) /* for ( i = 1 ; i < N ; i++ )
...@@ -680,122 +658,109 @@ void gparts_sort ( struct gpart *gparts , int *ind , int N , int min , int max ) ...@@ -680,122 +658,109 @@ void gparts_sort ( struct gpart *gparts , int *ind , int N , int min , int max )
qstack[i].ready = 0; qstack[i].ready = 0;
first = 0; last = 1; waiting = 1; first = 0; last = 1; waiting = 1;
/* Parallel bit. */ /* Main loop. */
#pragma omp parallel default(shared) shared(N,first,last,waiting,qstack,gparts,ind,qstack_size,stderr,engine_rank) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p) while ( waiting > 0 ) {
{
/* Grab an interval off the queue. */
/* Main loop. */ qid = ( first++ ) % qstack_size;
if ( omp_get_thread_num() < 8 )
while ( waiting > 0 ) {
/* Grab an interval off the queue. */
qid = atomic_inc( &first ) % qstack_size;
/* Wait for the interval to be ready. */
while ( waiting > 0 && atomic_cas( &qstack[qid].ready , 1 , 1 ) != 1 ); /* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
/* Broke loop for all the wrong reasons? */ /* Loop over sub-intervals. */
if ( waiting == 0 ) while ( 1 ) {
break;
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
// message( "thread %i got interval [%i,%i] with values in [%i,%i]." , omp_get_thread_num() , i , j , min , max );
/* Loop over sub-intervals. */
while ( 1 ) {
/* Bring beer. */ /* Bring beer. */
pivot = (min + max) / 2; pivot = (min + max) / 2;
/* One pass of QuickSort's partitioning. */ /* One pass of QuickSort's partitioning. */
ii = i; jj = j; ii = i; jj = j;
while ( ii < jj ) { while ( ii < jj ) {
while ( ii <= j && ind[ii] <= pivot ) while ( ii <= j && ind[ii] <= pivot )
ii++; ii++;
while ( jj >= i && ind[jj] > pivot ) while ( jj >= i && ind[jj] > pivot )
jj--; jj--;
if ( ii < jj ) { if ( ii < jj ) {
temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i; temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
temp_p = gparts[ii]; gparts[ii] = gparts[jj]; gparts[jj] = temp_p; temp_p = gparts[ii]; gparts[ii] = gparts[jj]; gparts[jj] = temp_p;
} }
} }
/* Verify sort. */ /* Verify sort. */
/* for ( int k = i ; k <= jj ; k++ ) /* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) { if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." ); error( "Partition failed (<=pivot)." );
} }
for ( int k = jj+1 ; k <= j ; k++ ) for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) { if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N ); message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." ); error( "Partition failed (>pivot)." );
} */ } */
/* Split-off largest interval. */ /* Split-off largest interval. */
if ( jj - i > j - jj+1 ) { if ( jj - i > j - jj+1 ) {
/* Recurse on the left? */ /* Recurse on the left? */
if ( jj > i && pivot > min ) { if ( jj > i && pivot > min ) {
qid = atomic_inc( &last ) % qstack_size; qid = ( last++ ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 ); qstack[qid].i = i;
qstack[qid].i = i; qstack[qid].j = jj;
qstack[qid].j = jj; qstack[qid].min = min;
qstack[qid].min = min; qstack[qid].max = pivot;
qstack[qid].max = pivot; qstack[qid].ready = 1;
qstack[qid].ready = 1; if ( ( waiting++ ) >= qstack_size )
if ( atomic_inc( &waiting ) >= qstack_size ) error( "Qstack overflow." );
error( "Qstack overflow." ); }
}
/* Recurse on the right? */
/* Recurse on the right? */ if ( jj+1 < j && pivot+1 < max ) {
if ( jj+1 < j && pivot+1 < max ) { i = jj+1;
i = jj+1; min = pivot+1;
min = pivot+1; }
} else
else break;
break;
}
}
else { else {
/* Recurse on the right? */ /* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) { if ( jj+1 < j && pivot+1 < max ) {
qid = atomic_inc( &last ) % qstack_size; qid = ( last++ ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 ); qstack[qid].i = jj+1;
qstack[qid].i = jj+1; qstack[qid].j = j;
qstack[qid].j = j; qstack[qid].min = pivot+1;
qstack[qid].min = pivot+1; qstack[qid].max = max;
qstack[qid].max = max; qstack[qid].ready = 1;
qstack[qid].ready = 1; if ( ( waiting++ ) >= qstack_size )
if ( atomic_inc( &waiting ) >= qstack_size ) error( "Qstack overflow." );
error( "Qstack overflow." ); }
}
/* Recurse on the left? */
/* Recurse on the left? */ if ( jj > i && pivot > min ) {
if ( jj > i && pivot > min ) { j = jj;
j = jj; max = pivot;
max = pivot; }
} else
else break;
break;
}
}
} /* loop over sub-intervals. */
} /* loop over sub-intervals. */
atomic_dec( &waiting ); waiting--;
} /* main loop. */ } /* main loop. */
} /* parallel bit. */
/* Verify sort. */ /* Verify sort. */
/* for ( i = 1 ; i < N ; i++ ) /* for ( i = 1 ; i < N ; i++ )
if ( ind[i-1] > ind[i] ) if ( ind[i-1] > ind[i] )
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment