diff --git a/src/cell.c b/src/cell.c index bfcecc94d95f9247a43e50a3f31031d979668f3b..356e4769e120bbf395964ad7a3d0ba8cea6a923e 100644 --- a/src/cell.c +++ b/src/cell.c @@ -258,8 +258,18 @@ void cell_split ( struct cell *c ) { for ( k = 0 ; k < 8 ; k++ ) { c->progeny[k]->count = right[k] - left[k] + 1; c->progeny[k]->parts = &c->parts[ left[k] ]; + c->progeny[k]->cparts = &c->cparts[ left[k] ]; } + /* Update the condensed particle data. */ + for ( k = 0 ; k < c->count ; k++ ) { + c->cparts[k].x[0] = c->parts[k].x[0]; + c->cparts[k].x[1] = c->parts[k].x[1]; + c->cparts[k].x[2] = c->parts[k].x[2]; + c->cparts[k].h = c->parts[k].h; + c->cparts[k].dt = c->parts[k].dt; + } + /* Verify a few sub-cells. */ /* for ( k = 0 ; k < c->progeny[0]->count ; k++ ) if ( c->progeny[0]->parts[k].x[0] > pivot[0] || diff --git a/src/cell.h b/src/cell.h index 8d54374f9b7a9de711e8dd0f2f04c9ae92d93ee2..23565c57694672fd5dd61e331c5d046c1b4727d7 100644 --- a/src/cell.h +++ b/src/cell.h @@ -49,6 +49,7 @@ struct cell { /* Pointers to the particle data. */ struct part *parts; + struct cpart *cparts; /* Pointers for the sorted indices. */ struct entry *sort; diff --git a/src/part.h b/src/part.h index 095e85c2e198a1fcaf3938384053cbee80321b46..f17cddaa47f6b3f150cb8171a4d6715825e3f3f6 100644 --- a/src/part.h +++ b/src/part.h @@ -24,9 +24,27 @@ #define part_dtmax 10 +/* Condensed data of a single particle. */ +struct cpart { + + /* Particle position. */ + double x[3]; + + /* Particle cutoff radius. */ + float h; + + /* Particle time-step. */ + float dt; + + } __attribute__((aligned (32))); + + /* Data of a single particle. */ struct part { + /* Particle position. */ + double x[3]; + /* Particle cutoff radius. */ float h; @@ -39,9 +57,6 @@ struct part { /* Particle ID. */ unsigned long long id; - /* Particle position. */ - double x[3]; - /* Particle velocity. */ float v[3]; @@ -70,7 +85,7 @@ struct part { float rho_dh; /* Particle number density. */ - int icount; + // int icount; float wcount; float wcount_dh; diff --git a/src/runner.c b/src/runner.c index 6805f927fecdafa136b85a932dd0b93a6c3ca8fc..223f1646741bebd3d949b3ca4366cda84c06d588 100644 --- a/src/runner.c +++ b/src/runner.c @@ -172,7 +172,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { struct entry *finger; struct entry *fingers[8]; - struct part *parts = c->parts; + struct cpart *cparts = c->cparts; int j, k, count = c->count; int i, ind, off[8], inds[8], temp_i; // float shift[3]; @@ -262,9 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { /* Fill the sort array. */ for ( k = 0 ; k < count ; k++ ) { - px[0] = parts[k].x[0]; - px[1] = parts[k].x[1]; - px[2] = parts[k].x[2]; + px[0] = cparts[k].x[0]; + px[1] = cparts[k].x[1]; + px[2] = cparts[k].x[2]; for ( j = 0 ; j < 13 ; j++ ) if ( flags & (1 << j) ) { c->sort[ j*(count + 1) + k].i = k; @@ -272,7 +272,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { } if ( flags & (1 << 14) ) { c->sort[ 14*(count + 1) + k ].i = k; - c->sort[ 14*(count + 1) + k ].d = parts[k].dt; + c->sort[ 14*(count + 1) + k ].d = cparts[k].dt; } } diff --git a/src/runner_doiact.h b/src/runner_doiact.h index a9a5710c288f7a888ddf1ecaa22836fe426482ee..f9e0338176a2b5f9fb596c85e44e035bbe791482 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -82,8 +82,9 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r struct engine *e = r->e; int pid, pjd, k, count_i = ci->count, count_j = cj->count; double shift[3] = { 0.0 , 0.0 , 0.0 }; - struct part *restrict pi, *restrict pj; struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts; + struct cpart *restrict cpj, *restrict cparts_j = cj->cparts; + struct cpart *restrict cpi, *restrict cparts_i = ci->cparts; double pix[3]; float dx[3], hi, hi2, r2; TIMER_TIC @@ -105,29 +106,29 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r for ( pid = 0 ; pid < count_i ; pid++ ) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[ pid ]; + cpi = &cparts_i[ pid ]; for ( k = 0 ; k < 3 ; k++ ) - pix[k] = pi->x[k] - shift[k]; - hi = pi->h; + pix[k] = cpi->x[k] - shift[k]; + hi = cpi->h; hi2 = hi * hi; /* Loop over the parts in cj. */ for ( pjd = 0 ; pjd < count_j ; pjd++ ) { /* Get a pointer to the jth particle. */ - pj = &parts_j[ pjd ]; + cpj = &cparts_j[ pjd ]; /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pix[k] - pj->x[k]; + dx[k] = pix[k] - cpj->x[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ - if ( r2 < hi2 || r2 < pj->h*pj->h ) { + if ( r2 < hi2 || r2 < cpj->h*cpj->h ) { - IACT( r2 , dx , hi , pj->h , pi , pj ); + IACT( r2 , dx , hi , cpj->h , &parts_i[ pid ] , &parts_j[pjd] ); } @@ -162,7 +163,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * struct engine *e = r->e; int pid, pjd, sid, k, count_j = cj->count, flipped; double shift[3] = { 0.0 , 0.0 , 0.0 }; - struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts; + struct part *restrict pi, *restrict parts_j = cj->parts; + struct cpart *restrict cpj, *restrict cparts_j = cj->cparts; double pix[3]; float dx[3], hi, hi2, r2, di; struct entry *sort_j; @@ -210,19 +212,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) { /* Get a pointer to the jth particle. */ - pj = &parts_j[ sort_j[ pjd ].i ]; + cpj = &cparts_j[ sort_j[ pjd ].i ]; /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pix[k] - pj->x[k]; + dx[k] = pix[k] - cpj->x[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ if ( r2 < hi2 ) { - IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); + IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] ); } @@ -250,19 +252,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) { /* Get a pointer to the jth particle. */ - pj = &parts_j[ sort_j[ pjd ].i ]; + cpj = &cparts_j[ sort_j[ pjd ].i ]; /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pix[k] - pj->x[k]; + dx[k] = pix[k] - cpj->x[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ if ( r2 < hi2 ) { - IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); + IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] ); } @@ -297,7 +299,9 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *restrict parts , int *restrict ind , int count ) { int pid, pjd, k, count_i = ci->count; - struct part *restrict pi, *restrict pj, *restrict parts_i = ci->parts; + struct part *restrict parts_i = ci->parts; + struct part *restrict pi; + struct cpart *restrict cpj, *restrict cparts = ci->cparts; double pix[3]; float dx[3], hi, hi2, r2; TIMER_TIC @@ -321,23 +325,19 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * for ( pjd = 0 ; pjd < count_i ; pjd++ ) { /* Get a pointer to the jth particle. */ - pj = &parts_i[ pjd ]; + cpj = &cparts[ pjd ]; - /* Skip the particle itself. */ - if ( pj == pi ) - continue; - /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pix[k] - pj->x[k]; + dx[k] = pix[k] - cpj->x[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ - if ( r2 < hi2 ) { + if ( r2 > 0.0f && r2 < hi2 ) { - IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); + IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_i[ pjd ] ); } @@ -370,7 +370,9 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric double rshift, shift[3] = { 0.0 , 0.0 , 0.0 }; struct cell *temp; struct entry *restrict sort_i, *restrict sort_j; - struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; + struct part *restrict parts_i, *restrict parts_j; + struct cpart *restrict cpi, *restrict cparts_i; + struct cpart *restrict cpj, *restrict cparts_j; double pix[3], pjx[3], di, dj; float dx[3], hi, hi2, hj, hj2, r2; double hi_max, hj_max; @@ -427,6 +429,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric hi_max = ci->h_max - rshift; hj_max = cj->h_max; count_i = ci->count; count_j = cj->count; parts_i = ci->parts; parts_j = cj->parts; + cparts_i = ci->cparts; cparts_j = cj->cparts; di_max = sort_i[count_i-1].d - rshift; dj_min = sort_j[0].d; @@ -437,33 +440,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) { /* Get a hold of the ith part in ci. */ - pi = &parts_i[ sort_i[ pid ].i ]; - hi = pi->h; + cpi = &cparts_i[ sort_i[ pid ].i ]; + hi = cpi->h; di = sort_i[pid].d + hi - rshift; if ( di < dj_min ) continue; - hi2 = pi->h * pi->h; + hi2 = hi * hi; for ( k = 0 ; k < 3 ; k++ ) - pix[k] = pi->x[k] - shift[k]; + pix[k] = cpi->x[k] - shift[k]; /* Loop over the parts in cj. */ for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d < di ; pjd++ ) { /* Get a pointer to the jth particle. */ - pj = &parts_j[ sort_j[pjd].i ]; + cpj = &cparts_j[ sort_j[pjd].i ]; /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pix[k] - pj->x[k]; + dx[k] = pix[k] - cpj->x[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ if ( r2 < hi2 ) { - IACT( r2 , dx , hi , pj->h , pi , pj ); + IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] ); } @@ -478,33 +481,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) { /* Get a hold of the jth part in cj. */ - pj = &parts_j[ sort_j[ pjd ].i ]; - hj = pj->h; + cpj = &cparts_j[ sort_j[ pjd ].i ]; + hj = cpj->h; dj = sort_j[pjd].d - hj - rshift; if ( dj > di_max ) continue; for ( k = 0 ; k < 3 ; k++ ) - pjx[k] = pj->x[k] + shift[k]; - hj2 = pj->h * pj->h; + pjx[k] = cpj->x[k] + shift[k]; + hj2 = hj * hj; /* Loop over the parts in ci. */ for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) { /* Get a pointer to the jth particle. */ - pi = &parts_i[ sort_i[pid].i ]; + cpi = &cparts_i[ sort_i[pid].i ]; /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pi->x[k] - pjx[k]; + dx[k] = cpi->x[k] - pjx[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ - if ( r2 < hj2 && r2 > pi->h*pi->h ) { + if ( r2 < hj2 && r2 > cpi->h*cpi->h ) { - IACT( r2 , dx , pi->h , hj , pi , pj ); + IACT( r2 , dx , cpi->h , hj , &parts_i[ sort_i[pid].i ] , &parts_j[ sort_j[ pjd ].i ] ); } @@ -533,7 +536,8 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) { int k, pid, pjd, count = c->count; double pix[3]; float dx[3], hi, hi2, r2; - struct part *restrict pi, *restrict pj, *restrict parts = c->parts; + struct part *restrict parts = c->parts; + struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts; TIMER_TIC if ( c->split ) @@ -543,31 +547,31 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) { for ( pid = 0 ; pid < count ; pid++ ) { /* Get a pointer to the ith particle. */ - pi = &parts[pid]; + cpi = &cparts[pid]; /* Get the particle position and radius. */ for ( k = 0 ; k < 3 ; k++ ) - pix[k] = pi->x[k]; - hi = pi->h; + pix[k] = cpi->x[k]; + hi = cpi->h; hi2 = hi * hi; /* Loop over the other particles .*/ for ( pjd = pid+1 ; pjd < count ; pjd++ ) { /* Get a pointer to the jth particle. */ - pj = &parts[pjd]; + cpj = &cparts[pjd]; /* Compute the pairwise distance. */ r2 = 0.0f; for ( k = 0 ; k < 3 ; k++ ) { - dx[k] = pix[k] - pj->x[k]; + dx[k] = pix[k] - cpj->x[k]; r2 += dx[k]*dx[k]; } /* Hit or miss? */ - if ( r2 < hi2 || r2 < pj->h*pj->h ) { + if ( r2 < hi2 || r2 < cpj->h*cpj->h ) { - IACT( r2 , dx , hi , pj->h , pi , pj ); + IACT( r2 , dx , hi , cpj->h , &parts[pid] , &parts[pjd] ); } diff --git a/src/runner_iact.h b/src/runner_iact.h index 67db41bbd11d22c8225bdd2efe53928b079dccc0..dae13c2fa6e272a905e8dadfb2c0615218a332f4 100644 --- a/src/runner_iact.h +++ b/src/runner_iact.h @@ -83,7 +83,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx ); pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - pi->icount += 1; + // pi->icount += 1; } @@ -98,7 +98,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx ); pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - pj->icount += 1; + // pj->icount += 1; } @@ -135,7 +135,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density ( pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx ); pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); - pi->icount += 1; + // pi->icount += 1; } diff --git a/src/space.c b/src/space.c index 60d2019f19f5c94a8d3e0edbb60b28e5811a0bd6..69239e8a51bb0a798bbec7bc54239fa3fe554f21 100644 --- a/src/space.c +++ b/src/space.c @@ -118,7 +118,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) { /* Count the particles below that. */ for ( count = 0 , k = 0 ; k < c->count ; k++ ) { - h = c->parts[k].h; + h = c->cparts[k].h; if ( h <= h_limit ) count += 1; if ( h > h_max ) @@ -212,14 +212,16 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { int i, j, k, cdim[3]; struct cell *c; struct part *finger; + struct cpart *cfinger; int *ind, changes = 0; /* Run through the parts and get the current h_max. */ - for ( k = 0 ; k < s->nr_parts ; k++ ) + for ( k = 0 ; k < s->nr_parts ; k++ ) { if ( s->parts[k].h > h_max ) h_max = s->parts[k].h; else if ( s->parts[k].h < h_min ) h_min = s->parts[k].h; + } s->h_min = h_min; s->h_max = h_max; @@ -264,28 +266,43 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { c->depth = 0; } - /* Run through the particles and get their cell index. */ - ind = (int *)alloca( sizeof(int) * s->nr_parts ); - for ( k = 0 ; k < s->nr_parts ; k++ ) { - ind[k] = cell_getid( cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] ); - s->cells[ ind[k] ].count += 1; - } - - /* Sort the parts according to their cells. */ - parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells ); - - /* Hook the cells up to the parts. */ - for ( finger = s->parts , k = 0 ; k < s->nr_cells ; k++ ) { - c = &s->cells[ k ]; - c->parts = finger; - finger = &finger[ c->count ]; - } - /* There were massive changes. */ changes = 1; } /* re-build upper-level cells? */ + + /* Run through the particles and get their cell index. */ + ind = (int *)alloca( sizeof(int) * s->nr_parts ); + for ( k = 0 ; k < s->nr_cells ; k++ ) + s->cells[ k ].count = 0; + for ( k = 0 ; k < s->nr_parts ; k++ ) { + ind[k] = cell_getid( s->cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] ); + s->cells[ ind[k] ].count += 1; + } + + /* Sort the parts according to their cells. */ + parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells ); + + /* Update the condensed particle data. */ + for ( k = 0 ; k < s->nr_parts ; k++ ) { + s->cparts[k].x[0] = s->parts[k].x[0]; + s->cparts[k].x[1] = s->parts[k].x[1]; + s->cparts[k].x[2] = s->parts[k].x[2]; + s->cparts[k].h = s->parts[k].h; + s->cparts[k].dt = s->parts[k].dt; + } + + /* Hook the cells up to the parts. */ + for ( finger = s->parts , cfinger = s->cparts , k = 0 ; k < s->nr_cells ; k++ ) { + c = &s->cells[ k ]; + c->parts = finger; + c->cparts = cfinger; + finger = &finger[ c->count ]; + cfinger = &cfinger[ c->count ]; + } + + /* At this point, we have the upper-level cells, old or new. Now make sure that the parts in each cell are ok. */ #pragma omp parallel for shared(s) reduction(+:changes) @@ -303,7 +320,7 @@ int space_rebuild ( struct space *s , int force , double cell_max ) { /** - * @brief Sort the particles according to the given indices. + * @brief Sort the particles and condensed particles according to the given indices. * * @param parts The list of #part * @param ind The indices with respect to which the parts are sorted. @@ -321,57 +338,79 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) { int temp_i; struct part temp_p; - /* One pass of quicksort. */ - while ( i < j ) { - while ( i < N && ind[i] <= pivot ) - i++; - while ( j >= 0 && ind[j] > pivot ) - j--; - if ( i < j ) { - temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i; - temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p; - } + /* If N is small enough, just do insert sort. */ + if ( N < 16 ) { + + for ( i = 1 ; i < N ; i++ ) + if ( ind[i] < ind[i-1] ) { + temp_i = ind[i]; + temp_p = parts[j]; + for ( j = i ; j > 0 && ind[j-1] > temp_i ; j-- ) { + ind[j] = ind[j-1]; + parts[j] = parts[j-1]; + } + ind[j] = temp_i; + parts[j] = temp_p; + } + } - /* Verify sort. */ - for ( int k = 0 ; k <= j ; k++ ) - if ( ind[k] > pivot ) { - printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); - error( "Sorting failed (<=pivot)." ); + /* Otherwise, recurse with Quicksort. */ + else { + + /* One pass of quicksort. */ + while ( i < j ) { + while ( i < N && ind[i] <= pivot ) + i++; + while ( j >= 0 && ind[j] > pivot ) + j--; + if ( i < j ) { + temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i; + temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p; + } } - for ( int k = j+1 ; k < N ; k++ ) - if ( ind[k] <= pivot ) { - printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); - error( "Sorting failed (>pivot)." ); + + /* Verify sort. */ + for ( int k = 0 ; k <= j ; k++ ) + if ( ind[k] > pivot ) { + printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); + error( "Sorting failed (<=pivot)." ); + } + for ( int k = j+1 ; k < N ; k++ ) + if ( ind[k] <= pivot ) { + printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N ); + error( "Sorting failed (>pivot)." ); + } + + /* Try to recurse in parallel. */ + if ( N < 100 ) { + + /* Recurse on the left? */ + if ( j > 0 && pivot > min ) + parts_sort( parts , ind , j+1 , min , pivot ); + + /* Recurse on the right? */ + if ( i < N && pivot+1 < max ) + parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + } - - /* Try to recurse in parallel. */ - if ( N < 100 ) { - - /* Recurse on the left? */ - if ( j > 0 && pivot > min ) - parts_sort( parts , ind , j+1 , min , pivot ); - /* Recurse on the right? */ - if ( i < N && pivot+1 < max ) - parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + else + #pragma omp parallel sections + { - } - - else - #pragma omp parallel sections - { - - /* Recurse on the left? */ - #pragma omp section - if ( j > 0 && pivot > min ) - parts_sort( parts , ind , j+1 , min , pivot ); + /* Recurse on the left? */ + #pragma omp section + if ( j > 0 && pivot > min ) + parts_sort( parts , ind , j+1 , min , pivot ); - /* Recurse on the right? */ - #pragma omp section - if ( i < N && pivot+1 < max ) - parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + /* Recurse on the right? */ + #pragma omp section + if ( i < N && pivot+1 < max ) + parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max ); + } + } } @@ -1306,7 +1345,7 @@ void space_split ( struct space *s , struct cell *c ) { /* Count the particles below that. */ for ( count = 0 , k = 0 ; k < c->count ; k++ ) { - h = c->parts[k].h; + h = c->cparts[k].h; if ( h <= h_limit ) count += 1; if ( h > h_max ) @@ -1387,6 +1426,9 @@ void space_recycle ( struct space *s , struct cell *c ) { /* Clear this cell's sort arrays. */ if ( c->sort != NULL ) free( c->sort ); + + /* Clear the cell data. */ + bzero( c , sizeof(struct cell) ); /* Hook this cell into the buffer. */ c->next = s->cells_new; @@ -1465,6 +1507,10 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , s->nr_parts = N; s->parts = parts; + /* Allocate the cparts array. */ + if ( posix_memalign( (void *)&s->cparts , 32 , N * sizeof(struct cpart) ) != 0 ) + error( "Failed to allocate cparts." ); + /* Init the space lock. */ if ( lock_init( &s->lock ) != 0 ) error( "Failed to create space spin-lock." ); diff --git a/src/space.h b/src/space.h index 6230cb298f419da4d5412b30acadda735d8ca985..e648b680608f58813b032b05183123cdf3e55040 100644 --- a/src/space.h +++ b/src/space.h @@ -75,6 +75,7 @@ struct space { /* The particle data (cells have pointers to this). */ struct part *parts; + struct cpart *cparts; /* The sortlist data (cells hold pointers to these). */ struct entry *sortlist;