Commit 4cdc697d authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

Merge branch 'no_openmp' into 'master'

Cleaned out the OpenMP statements

Removed all the OpenMP pragmas, OpenMP functions and header inclusions. Also replaced the associated atomic operations with normal operations.

There might have been some race condition problems in some routines of space.c where some non-atomic operations were performed in critical parts of the code. Not sure whether it was actually a problem.

Closes issue #20.

See merge request !28

Former-commit-id: c8c82f2cf89aa6d3dd6a7ac76addf932ed1ccc0f
parents 37b16d54 d5e5118f
......@@ -188,17 +188,6 @@ AX_PTHREAD([LIBS="$PTHREAD_LIBS $LIBS" CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
or use CPPFLAGS and LDFLAGS if the library is installed in a
non-standard location.]))
# Check for OpenMP.
AC_OPENMP
AC_SUBST(OPENMP_CFLAGS)
enable_openmp="no"
if test -z "$OPENMP_CFLAGS"; then
echo $OPENMP_CFLAGS
AC_MSG_ERROR(Compiler does not support OpenMP, 1)
else
CFLAGS="$CFLAGS $OPENMP_CFLAGS"
enable_openmp="yes"
fi
# Check for metis. Note AX_LIB_METIS exists, but cannot be configured
# to be default off (i.e. given no option it tries to locate METIS), so we
......@@ -305,7 +294,6 @@ AC_MSG_RESULT([
MPI enabled: $enable_mpi
HDF5 enabled: $with_hdf5
parallel: $have_parallel_hdf5
OpenMP enabled: $enable_openmp
Metis enabled: $with_metis
])
......
......@@ -32,7 +32,6 @@
#include <float.h>
#include <limits.h>
#include <fenv.h>
#include <omp.h>
/* Conditional headers. */
#ifdef HAVE_LIBZ
......@@ -271,7 +270,6 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri
double rho_max = 0.0, rho_min = 100;
/* Loop over all particle pairs. */
#pragma omp parallel for schedule(dynamic), default(none), private(k,i,dx,r2), shared(periodic,parts,dim,N,stdout)
for ( j = 0 ; j < N ; j++ ) {
if ( j % 1000 == 0 ) {
printf( "pairs_n2: j=%i.\n" , j );
......@@ -291,13 +289,11 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri
if ( r2 < parts[j].h*parts[j].h || r2 < parts[k].h*parts[k].h ) {
runner_iact_density( r2 , NULL , parts[j].h , parts[k].h , &parts[j] , &parts[k] );
/* if ( parts[j].h / parts[k].h > maxratio )
#pragma omp critical
{
maxratio = parts[j].h / parts[k].h;
mj = j; mk = k;
}
else if ( parts[k].h / parts[j].h > maxratio )
#pragma omp critical
{
maxratio = parts[k].h / parts[j].h;
mj = j; mk = k;
......@@ -658,7 +654,6 @@ int main ( int argc , char *argv[] ) {
case 't':
if ( sscanf( optarg , "%d" , &nr_threads ) != 1 )
error( "Error parsing number of threads." );
omp_set_num_threads( nr_threads );
break;
case 'w':
if ( sscanf( optarg , "%d" , &space_subsize ) != 1 )
......
......@@ -31,7 +31,6 @@
#include <float.h>
#include <limits.h>
#include <fenv.h>
#include <omp.h>
/* Conditional headers. */
#ifdef HAVE_LIBZ
......
......@@ -29,7 +29,6 @@
#include <math.h>
#include <float.h>
#include <limits.h>
#include <omp.h>
#include <sched.h>
/* MPI headers. */
......@@ -1041,7 +1040,6 @@ void engine_maketasks ( struct engine *e ) {
/* Count the number of tasks associated with each cell and
store the density tasks in each cell, and make each sort
depend on the sorts of its progeny. */
// #pragma omp parallel for private(t,j)
for ( k = 0 ; k < sched->nr_tasks ; k++ ) {
/* Get the current task. */
......@@ -1112,7 +1110,6 @@ void engine_maketasks ( struct engine *e ) {
Each force task depends on the cell ghosts and unlocks the kick2 task
of its super-cell. */
kk = sched->nr_tasks;
// #pragma omp parallel for private(t,t2)
for ( k = 0 ; k < kk ; k++ ) {
/* Get a pointer to the task. */
......
......@@ -29,7 +29,6 @@
#include <math.h>
#include <float.h>
#include <limits.h>
#include <omp.h>
#include <sched.h>
/* MPI headers. */
......
......@@ -28,7 +28,6 @@
#include <math.h>
#include <float.h>
#include <limits.h>
#include <omp.h>
/* MPI headers. */
#ifdef WITH_MPI
......
......@@ -27,7 +27,6 @@
#include <math.h>
#include <pthread.h>
#include <limits.h>
#include <omp.h>
/* MPI headers. */
#ifdef WITH_MPI
......@@ -725,7 +724,6 @@ void scheduler_reweight ( struct scheduler *s ) {
/* Run throught the tasks backwards and set their waits and
weights. */
// tic = getticks();
// #pragma omp parallel for schedule(static) private(t,j)
for ( k = nr_tasks-1 ; k >= 0 ; k-- ) {
t = &tasks[ tid[k] ];
t->weight = 0;
......@@ -810,7 +808,6 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) {
/* Run throught the tasks and set their waits. */
// tic = getticks();
// #pragma omp parallel for schedule(static) private(t,j)
for ( k = nr_tasks - 1 ; k >= 0 ; k-- ) {
t = &tasks[ tid[k] ];
t->wait = 0;
......
......@@ -28,7 +28,7 @@
#include <float.h>
#include <limits.h>
#include <math.h>
#include <omp.h>
/* MPI headers. */
#ifdef WITH_MPI
......@@ -38,7 +38,6 @@
/* Local headers. */
#include "const.h"
#include "cycle.h"
#include "atomic.h"
#include "lock.h"
#include "task.h"
#include "kernel.h"
......@@ -327,7 +326,6 @@ void space_rebuild ( struct space *s , double cell_max ) {
ih[0] = s->ih[0]; ih[1] = s->ih[1]; ih[2] = s->ih[2];
dim[0] = s->dim[0]; dim[1] = s->dim[1]; dim[2] = s->dim[2];
cdim[0] = s->cdim[0]; cdim[1] = s->cdim[1]; cdim[2] = s->cdim[2];
#pragma omp parallel for private(p,j)
for ( k = 0 ; k < nr_parts ; k++ ) {
p = &parts[k];
for ( j = 0 ; j < 3 ; j++ )
......@@ -336,7 +334,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
else if ( p->x[j] >= dim[j] )
p->x[j] -= dim[j];
ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] );
atomic_inc( &cells[ ind[k] ].count );
cells[ ind[k] ].count++;
}
// message( "getting particle indices took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 );
......@@ -414,7 +412,6 @@ void space_rebuild ( struct space *s , double cell_max ) {
// tic = getticks();
if ( ( ind = (int *)malloc( sizeof(int) * s->size_gparts ) ) == NULL )
error( "Failed to allocate temporary particle indices." );
#pragma omp parallel for private(gp,j)
for ( k = 0 ; k < nr_gparts ; k++ ) {
gp = &gparts[k];
for ( j = 0 ; j < 3 ; j++ )
......@@ -423,7 +420,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
else if ( gp->x[j] >= dim[j] )
gp->x[j] -= dim[j];
ind[k] = cell_getid( cdim , gp->x[0]*ih[0] , gp->x[1]*ih[1] , gp->x[2]*ih[2] );
atomic_inc( &cells[ ind[k] ].gcount );
cells[ ind[k] ].gcount++;
}
// message( "getting particle indices took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 );
......@@ -463,18 +460,9 @@ void space_rebuild ( struct space *s , double cell_max ) {
/* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */
// tic = getticks();
k = 0;
#pragma omp parallel shared(s,k)
{
if ( omp_get_thread_num() < 8 )
while ( 1 ) {
int myk = atomic_inc( &k );
if ( myk < s->nr_cells )
space_split( s , &cells[myk] );
else
break;
}
}
for ( k = 0; k < s->nr_cells; k++ )
space_split( s , &cells[k] );
// message( "space_split took %.3f ms." , (double)(getticks() - tic) / CPU_TPS * 1000 );
}
......@@ -524,122 +512,108 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
qstack[i].ready = 0;
first = 0; last = 1; waiting = 1;
/* Parallel bit. */
#pragma omp parallel default(shared) shared(N,first,last,waiting,qstack,parts,xparts,ind,qstack_size,stderr,engine_rank) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_xp,temp_p)
{
/* Main loop. */
if ( omp_get_thread_num() < 8 )
while ( waiting > 0 ) {
/* Main loop. */
while ( waiting > 0 ) {
/* Grab an interval off the queue. */
qid = atomic_inc( &first ) % qstack_size;
/* Wait for the interval to be ready. */
while ( waiting > 0 && atomic_cas( &qstack[qid].ready , 1 , 1 ) != 1 );
/* Grab an interval off the queue. */
qid = ( first++ ) % qstack_size;
/* Broke loop for all the wrong reasons? */
if ( waiting == 0 )
break;
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
// message( "thread %i got interval [%i,%i] with values in [%i,%i]." , omp_get_thread_num() , i , j , min , max );
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
/* Loop over sub-intervals. */
while ( 1 ) {
/* Bring beer. */
pivot = (min + max) / 2;
/* One pass of QuickSort's partitioning. */
ii = i; jj = j;
while ( ii < jj ) {
while ( ii <= j && ind[ii] <= pivot )
ii++;
while ( jj >= i && ind[jj] > pivot )
jj--;
if ( ii < jj ) {
temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
temp_p = parts[ii]; parts[ii] = parts[jj]; parts[jj] = temp_p;
temp_xp = xparts[ii]; xparts[ii] = xparts[jj]; xparts[jj] = temp_xp;
}
}
/* Verify sort. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." );
}
for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." );
} */
/* Split-off largest interval. */
if ( jj - i > j - jj+1 ) {
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
i = jj+1;
min = pivot+1;
}
else
break;
}
else {
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Loop over sub-intervals. */
while ( 1 ) {
/* Bring beer. */
pivot = (min + max) / 2;
/* One pass of QuickSort's partitioning. */
ii = i; jj = j;
while ( ii < jj ) {
while ( ii <= j && ind[ii] <= pivot )
ii++;
while ( jj >= i && ind[jj] > pivot )
jj--;
if ( ii < jj ) {
temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
temp_p = parts[ii]; parts[ii] = parts[jj]; parts[jj] = temp_p;
temp_xp = xparts[ii]; xparts[ii] = xparts[jj]; xparts[jj] = temp_xp;
}
}
/* Verify sort. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." );
}
for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." );
} */
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
j = jj;
max = pivot;
}
else
break;
}
} /* loop over sub-intervals. */
atomic_dec( &waiting );
} /* main loop. */
/* Split-off largest interval. */
if ( jj - i > j - jj+1 ) {
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
qid = ( last++ ) % qstack_size;
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if ( waiting++ >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
i = jj+1;
min = pivot+1;
}
else
break;
}
else {
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = ( last++ ) % qstack_size;
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ( ( waiting++ ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
j = jj;
max = pivot;
}
else
break;
}
} /* loop over sub-intervals. */
waiting--;
} /* main loop. */
} /* parallel bit. */
/* Verify sort. */
/* for ( i = 1 ; i < N ; i++ )
......@@ -684,122 +658,109 @@ void gparts_sort ( struct gpart *gparts , int *ind , int N , int min , int max )
qstack[i].ready = 0;
first = 0; last = 1; waiting = 1;
/* Parallel bit. */
#pragma omp parallel default(shared) shared(N,first,last,waiting,qstack,gparts,ind,qstack_size,stderr,engine_rank) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p)
{
/* Main loop. */
if ( omp_get_thread_num() < 8 )
while ( waiting > 0 ) {
/* Grab an interval off the queue. */
qid = atomic_inc( &first ) % qstack_size;
/* Main loop. */
while ( waiting > 0 ) {
/* Grab an interval off the queue. */
qid = ( first++ ) % qstack_size;
/* Wait for the interval to be ready. */
while ( waiting > 0 && atomic_cas( &qstack[qid].ready , 1 , 1 ) != 1 );
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
/* Broke loop for all the wrong reasons? */
if ( waiting == 0 )
break;
/* Get the stack entry. */
i = qstack[qid].i;
j = qstack[qid].j;
min = qstack[qid].min;
max = qstack[qid].max;
qstack[qid].ready = 0;
// message( "thread %i got interval [%i,%i] with values in [%i,%i]." , omp_get_thread_num() , i , j , min , max );
/* Loop over sub-intervals. */
while ( 1 ) {
/* Loop over sub-intervals. */
while ( 1 ) {
/* Bring beer. */
pivot = (min + max) / 2;
/* One pass of QuickSort's partitioning. */
ii = i; jj = j;
while ( ii < jj ) {
while ( ii <= j && ind[ii] <= pivot )
ii++;
while ( jj >= i && ind[jj] > pivot )
jj--;
if ( ii < jj ) {
temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
temp_p = gparts[ii]; gparts[ii] = gparts[jj]; gparts[jj] = temp_p;
}
}
/* Verify sort. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." );
}
for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." );
} */
/* Split-off largest interval. */
if ( jj - i > j - jj+1 ) {
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
i = jj+1;
min = pivot+1;
}
else
break;
}
/* Bring beer. */
pivot = (min + max) / 2;
/* One pass of QuickSort's partitioning. */
ii = i; jj = j;
while ( ii < jj ) {
while ( ii <= j && ind[ii] <= pivot )
ii++;
while ( jj >= i && ind[jj] > pivot )
jj--;
if ( ii < jj ) {
temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
temp_p = gparts[ii]; gparts[ii] = gparts[jj]; gparts[jj] = temp_p;
}
}
/* Verify sort. */
/* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (<=pivot)." );
}
for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) {
message( "sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i." , k , ind[k] , pivot , i , j , N );
error( "Partition failed (>pivot)." );
} */
/* Split-off largest interval. */
if ( jj - i > j - jj+1 ) {
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
qid = ( last++ ) % qstack_size;
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
if ( ( waiting++ ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
i = jj+1;
min = pivot+1;
}
else
break;
}
else {
else {
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
j = jj;
max = pivot;
}
else
break;
}
} /* loop over sub-intervals. */
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = ( last++ ) % qstack_size;
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
if ( ( waiting++ ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
j = jj;
max = pivot;
}
else
break;