Commit 3d9ebb0c authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

re-implemented parallel quicksort without OpenMP tasks.


Former-commit-id: f9a214cf7dda269e4d308b7824ce5330473d70df
parent e58366d7
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <float.h> #include <float.h>
#include <limits.h> #include <limits.h>
#include <math.h> #include <math.h>
#include <omp.h>
/* Local headers. */ /* Local headers. */
#include "cycle.h" #include "cycle.h"
...@@ -333,109 +334,125 @@ void space_rebuild ( struct space *s , double cell_max ) { ...@@ -333,109 +334,125 @@ void space_rebuild ( struct space *s , double cell_max ) {
* @param N The number of parts * @param N The number of parts
* @param min Lowest index. * @param min Lowest index.
* @param max highest index. * @param max highest index.
*
* This function calls itself recursively.
*/ */
void parts_sort_rec ( struct part *parts , int *ind , int N , int min , int max ) { void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
int pivot = (min + max) / 2; struct {
int i = 0, j = N-1; int i, j, min, max;
int temp_i; } qstack[space_qstack];
int first, last, waiting;
int pivot;
int i, ii, j, jj, temp_i, qid;
struct part temp_p; struct part temp_p;
/* If N is small enough, just do insert sort. */ /* Init the interval stack. */
if ( N < 16 ) { qstack[0].i = 0;
qstack[0].j = N-1;
qstack[0].min = min;
qstack[0].max = max;
first = 0; last = 0; waiting = 1;
/* Parallel bit. */
#pragma omp parallel default(shared) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p)
{
for ( i = 1 ; i < N ; i++ ) /* Main loop. */
if ( ind[i] < ind[i-1] ) { while ( waiting > 0 ) {
temp_i = ind[i];
temp_p = parts[i]; /* Try to get an index off the stack. */
for ( j = i ; j > 0 && ind[j-1] > temp_i ; j-- ) { qid = -1;
ind[j] = ind[j-1]; while ( waiting > 0 && qid < 0 ) {
parts[j] = parts[j-1]; #pragma omp critical (stack)
{
if ( last - first > space_qstack )
error( "Sorting stack overflow." );
if ( first <= last ) {
qid = first;
first += 1;
}
} }
ind[j] = temp_i;
parts[j] = temp_p;
} }
/* Did we get an index? */
if ( qid < 0 )
continue;
/* Verify sort. */ /* Get the stack entry. */
/* for ( i = 1 ; i < N ; i++ ) qid %= space_qstack;
if ( ind[i-1] > ind[i] ) i = qstack[qid].i;
error( "Insert-sort failed!" ); */ j = qstack[qid].j;
min = qstack[qid].min;
} max = qstack[qid].max;
pivot = (min + max) / 2;
/* Otherwise, recurse with Quicksort. */ // printf( "parts_sort_par: thread %i got interval [%i,%i] with values in [%i,%i].\n" , omp_get_thread_num() , i , j , min , max );
else {
/* One pass of QuickSort's partitioning. */
/* One pass of quicksort. */ ii = i; jj = j;
while ( i < j ) { while ( ii < jj ) {
while ( i < N && ind[i] <= pivot ) while ( ii <= j && ind[ii] <= pivot )
i++; ii++;
while ( j >= 0 && ind[j] > pivot ) while ( jj >= i && ind[jj] > pivot )
j--; jj--;
if ( i < j ) { if ( ii < jj ) {
temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i; temp_i = ind[ii]; ind[ii] = ind[jj]; ind[jj] = temp_i;
temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p; temp_p = parts[ii]; parts[ii] = parts[jj]; parts[jj] = temp_p;
}
} }
}
/* Verify sort. */ /* Verify sort. */
/* for ( int k = 0 ; k <= j ; k++ ) /* for ( int k = i ; k <= jj ; k++ )
if ( ind[k] > pivot ) { if ( ind[k] > pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (<=pivot)." ); error( "Partition failed (<=pivot)." );
} }
for ( int k = j+1 ; k < N ; k++ ) for ( int k = jj+1 ; k <= j ; k++ )
if ( ind[k] <= pivot ) { if ( ind[k] <= pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (>pivot)." ); error( "Partition failed (>pivot)." );
} */ } */
/* Bother going parallel? */
if ( N < 100 ) {
/* Recurse on the left? */
if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the right? */
if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
}
else {
/* Recurse on the left? */ /* Recurse on the left? */
if ( j > 0 && pivot > min ) { if ( jj > i && pivot > min ) {
#pragma omp task untied #pragma omp critical (stack)
parts_sort( parts , ind , j+1 , min , pivot ); {
last += 1;
qid = last % space_qstack;
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
waiting += 1;
}
} }
/* Recurse on the right? */ /* Recurse on the right? */
if ( i < N && pivot+1 < max ) { if ( jj+1 < j && pivot+1 < max ) {
#pragma omp task untied #pragma omp critical (stack)
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); {
last += 1;
qid = last % space_qstack;
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
waiting += 1;
}
} }
}
}
} #pragma omp critical (stack)
waiting -= 1;
void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) { } /* main loop. */
/* Call the first sort as an OpenMP task. */ } /* parallel bit. */
#pragma omp parallel
{
#pragma omp single nowait
parts_sort_rec( parts , ind , N , min , max );
}
/* Verify sort. */
/* for ( i = 1 ; i < N ; i++ )
if ( ind[i-1] > ind[i] )
error( "Sorting failed!" ); */
} }
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#define space_subsize_default 5000 #define space_subsize_default 5000
#define space_stretch 1.05f #define space_stretch 1.05f
#define space_maxreldx 0.2f #define space_maxreldx 0.2f
#define space_qstack 1000
/* Convert cell location to ID. */ /* Convert cell location to ID. */
...@@ -97,6 +98,7 @@ struct space { ...@@ -97,6 +98,7 @@ struct space {
/* function prototypes. */ /* function prototypes. */
void parts_sort ( struct part *parts , int *ind , int N , int min , int max ); void parts_sort ( struct part *parts , int *ind , int N , int min , int max );
void parts_sort_par ( struct part *parts , int *ind , int N , int min , int max );
struct cell *space_getcell ( struct space *s ); struct cell *space_getcell ( struct space *s );
int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift ); int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift );
void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int periodic , double h_max ); void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int periodic , double h_max );
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment