Skip to content
Snippets Groups Projects
Commit f8c01941 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

added cparts, which contain only x, h, and dt of each part for faster, more cache-efficient access.

Former-commit-id: dbe48ffac91fdca4909a9de4484b9c5385e8f2cf
parent acf43028
No related branches found
No related tags found
No related merge requests found
......@@ -258,8 +258,18 @@ void cell_split ( struct cell *c ) {
for ( k = 0 ; k < 8 ; k++ ) {
c->progeny[k]->count = right[k] - left[k] + 1;
c->progeny[k]->parts = &c->parts[ left[k] ];
c->progeny[k]->cparts = &c->cparts[ left[k] ];
}
/* Update the condensed particle data. */
for ( k = 0 ; k < c->count ; k++ ) {
c->cparts[k].x[0] = c->parts[k].x[0];
c->cparts[k].x[1] = c->parts[k].x[1];
c->cparts[k].x[2] = c->parts[k].x[2];
c->cparts[k].h = c->parts[k].h;
c->cparts[k].dt = c->parts[k].dt;
}
/* Verify a few sub-cells. */
/* for ( k = 0 ; k < c->progeny[0]->count ; k++ )
if ( c->progeny[0]->parts[k].x[0] > pivot[0] ||
......
......@@ -49,6 +49,7 @@ struct cell {
/* Pointers to the particle data. */
struct part *parts;
struct cpart *cparts;
/* Pointers for the sorted indices. */
struct entry *sort;
......
......@@ -24,9 +24,27 @@
#define part_dtmax 10
/* Condensed data of a single particle. */
struct cpart {
/* Particle position. */
double x[3];
/* Particle cutoff radius. */
float h;
/* Particle time-step. */
float dt;
} __attribute__((aligned (32)));
/* Data of a single particle. */
struct part {
/* Particle position. */
double x[3];
/* Particle cutoff radius. */
float h;
......@@ -39,9 +57,6 @@ struct part {
/* Particle ID. */
unsigned long long id;
/* Particle position. */
double x[3];
/* Particle velocity. */
float v[3];
......@@ -70,7 +85,7 @@ struct part {
float rho_dh;
/* Particle number density. */
int icount;
// int icount;
float wcount;
float wcount_dh;
......
......@@ -172,7 +172,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
struct entry *finger;
struct entry *fingers[8];
struct part *parts = c->parts;
struct cpart *cparts = c->cparts;
int j, k, count = c->count;
int i, ind, off[8], inds[8], temp_i;
// float shift[3];
......@@ -262,9 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
/* Fill the sort array. */
for ( k = 0 ; k < count ; k++ ) {
px[0] = parts[k].x[0];
px[1] = parts[k].x[1];
px[2] = parts[k].x[2];
px[0] = cparts[k].x[0];
px[1] = cparts[k].x[1];
px[2] = cparts[k].x[2];
for ( j = 0 ; j < 13 ; j++ )
if ( flags & (1 << j) ) {
c->sort[ j*(count + 1) + k].i = k;
......@@ -272,7 +272,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
}
if ( flags & (1 << 14) ) {
c->sort[ 14*(count + 1) + k ].i = k;
c->sort[ 14*(count + 1) + k ].d = parts[k].dt;
c->sort[ 14*(count + 1) + k ].d = cparts[k].dt;
}
}
......
......@@ -82,8 +82,9 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
struct engine *e = r->e;
int pid, pjd, k, count_i = ci->count, count_j = cj->count;
double shift[3] = { 0.0 , 0.0 , 0.0 };
struct part *restrict pi, *restrict pj;
struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts;
struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
struct cpart *restrict cpi, *restrict cparts_i = ci->cparts;
double pix[3];
float dx[3], hi, hi2, r2;
TIMER_TIC
......@@ -105,29 +106,29 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
for ( pid = 0 ; pid < count_i ; pid++ ) {
/* Get a hold of the ith part in ci. */
pi = &parts_i[ pid ];
cpi = &cparts_i[ pid ];
for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k];
hi = pi->h;
pix[k] = cpi->x[k] - shift[k];
hi = cpi->h;
hi2 = hi * hi;
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ pjd ];
cpj = &cparts_j[ pjd ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 || r2 < pj->h*pj->h ) {
if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
IACT( r2 , dx , hi , pj->h , pi , pj );
IACT( r2 , dx , hi , cpj->h , &parts_i[ pid ] , &parts_j[pjd] );
}
......@@ -162,7 +163,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
struct engine *e = r->e;
int pid, pjd, sid, k, count_j = cj->count, flipped;
double shift[3] = { 0.0 , 0.0 , 0.0 };
struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts;
struct part *restrict pi, *restrict parts_j = cj->parts;
struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
double pix[3];
float dx[3], hi, hi2, r2, di;
struct entry *sort_j;
......@@ -210,19 +212,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[ pjd ].i ];
cpj = &cparts_j[ sort_j[ pjd ].i ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] );
}
......@@ -250,19 +252,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[ pjd ].i ];
cpj = &cparts_j[ sort_j[ pjd ].i ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] );
}
......@@ -297,7 +299,9 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *restrict parts , int *restrict ind , int count ) {
int pid, pjd, k, count_i = ci->count;
struct part *restrict pi, *restrict pj, *restrict parts_i = ci->parts;
struct part *restrict parts_i = ci->parts;
struct part *restrict pi;
struct cpart *restrict cpj, *restrict cparts = ci->cparts;
double pix[3];
float dx[3], hi, hi2, r2;
TIMER_TIC
......@@ -321,23 +325,19 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
for ( pjd = 0 ; pjd < count_i ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts_i[ pjd ];
cpj = &cparts[ pjd ];
/* Skip the particle itself. */
if ( pj == pi )
continue;
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
if ( r2 > 0.0f && r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_i[ pjd ] );
}
......@@ -370,7 +370,9 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
struct cell *temp;
struct entry *restrict sort_i, *restrict sort_j;
struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
struct part *restrict parts_i, *restrict parts_j;
struct cpart *restrict cpi, *restrict cparts_i;
struct cpart *restrict cpj, *restrict cparts_j;
double pix[3], pjx[3], di, dj;
float dx[3], hi, hi2, hj, hj2, r2;
double hi_max, hj_max;
......@@ -427,6 +429,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
hi_max = ci->h_max - rshift; hj_max = cj->h_max;
count_i = ci->count; count_j = cj->count;
parts_i = ci->parts; parts_j = cj->parts;
cparts_i = ci->cparts; cparts_j = cj->cparts;
di_max = sort_i[count_i-1].d - rshift;
dj_min = sort_j[0].d;
......@@ -437,33 +440,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
/* Get a hold of the ith part in ci. */
pi = &parts_i[ sort_i[ pid ].i ];
hi = pi->h;
cpi = &cparts_i[ sort_i[ pid ].i ];
hi = cpi->h;
di = sort_i[pid].d + hi - rshift;
if ( di < dj_min )
continue;
hi2 = pi->h * pi->h;
hi2 = hi * hi;
for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k];
pix[k] = cpi->x[k] - shift[k];
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d < di ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[pjd].i ];
cpj = &cparts_j[ sort_j[pjd].i ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT( r2 , dx , hi , pj->h , pi , pj );
IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] );
}
......@@ -478,33 +481,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
/* Get a hold of the jth part in cj. */
pj = &parts_j[ sort_j[ pjd ].i ];
hj = pj->h;
cpj = &cparts_j[ sort_j[ pjd ].i ];
hj = cpj->h;
dj = sort_j[pjd].d - hj - rshift;
if ( dj > di_max )
continue;
for ( k = 0 ; k < 3 ; k++ )
pjx[k] = pj->x[k] + shift[k];
hj2 = pj->h * pj->h;
pjx[k] = cpj->x[k] + shift[k];
hj2 = hj * hj;
/* Loop over the parts in ci. */
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) {
/* Get a pointer to the jth particle. */
pi = &parts_i[ sort_i[pid].i ];
cpi = &cparts_i[ sort_i[pid].i ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pi->x[k] - pjx[k];
dx[k] = cpi->x[k] - pjx[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hj2 && r2 > pi->h*pi->h ) {
if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
IACT( r2 , dx , pi->h , hj , pi , pj );
IACT( r2 , dx , cpi->h , hj , &parts_i[ sort_i[pid].i ] , &parts_j[ sort_j[ pjd ].i ] );
}
......@@ -533,7 +536,8 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
int k, pid, pjd, count = c->count;
double pix[3];
float dx[3], hi, hi2, r2;
struct part *restrict pi, *restrict pj, *restrict parts = c->parts;
struct part *restrict parts = c->parts;
struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
TIMER_TIC
if ( c->split )
......@@ -543,31 +547,31 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
for ( pid = 0 ; pid < count ; pid++ ) {
/* Get a pointer to the ith particle. */
pi = &parts[pid];
cpi = &cparts[pid];
/* Get the particle position and radius. */
for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k];
hi = pi->h;
pix[k] = cpi->x[k];
hi = cpi->h;
hi2 = hi * hi;
/* Loop over the other particles .*/
for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts[pjd];
cpj = &cparts[pjd];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 || r2 < pj->h*pj->h ) {
if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
IACT( r2 , dx , hi , pj->h , pi , pj );
IACT( r2 , dx , hi , cpj->h , &parts[pid] , &parts[pjd] );
}
......
......@@ -83,7 +83,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->icount += 1;
// pi->icount += 1;
}
......@@ -98,7 +98,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->icount += 1;
// pj->icount += 1;
}
......@@ -135,7 +135,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density (
pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->icount += 1;
// pi->icount += 1;
}
......
......@@ -118,7 +118,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
/* Count the particles below that. */
for ( count = 0 , k = 0 ; k < c->count ; k++ ) {
h = c->parts[k].h;
h = c->cparts[k].h;
if ( h <= h_limit )
count += 1;
if ( h > h_max )
......@@ -212,14 +212,16 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
int i, j, k, cdim[3];
struct cell *c;
struct part *finger;
struct cpart *cfinger;
int *ind, changes = 0;
/* Run through the parts and get the current h_max. */
for ( k = 0 ; k < s->nr_parts ; k++ )
for ( k = 0 ; k < s->nr_parts ; k++ ) {
if ( s->parts[k].h > h_max )
h_max = s->parts[k].h;
else if ( s->parts[k].h < h_min )
h_min = s->parts[k].h;
}
s->h_min = h_min;
s->h_max = h_max;
......@@ -264,28 +266,43 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
c->depth = 0;
}
/* Run through the particles and get their cell index. */
ind = (int *)alloca( sizeof(int) * s->nr_parts );
for ( k = 0 ; k < s->nr_parts ; k++ ) {
ind[k] = cell_getid( cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] );
s->cells[ ind[k] ].count += 1;
}
/* Sort the parts according to their cells. */
parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );
/* Hook the cells up to the parts. */
for ( finger = s->parts , k = 0 ; k < s->nr_cells ; k++ ) {
c = &s->cells[ k ];
c->parts = finger;
finger = &finger[ c->count ];
}
/* There were massive changes. */
changes = 1;
} /* re-build upper-level cells? */
/* Run through the particles and get their cell index. */
ind = (int *)alloca( sizeof(int) * s->nr_parts );
for ( k = 0 ; k < s->nr_cells ; k++ )
s->cells[ k ].count = 0;
for ( k = 0 ; k < s->nr_parts ; k++ ) {
ind[k] = cell_getid( s->cdim , s->parts[k].x[0]*s->ih[0] , s->parts[k].x[1]*s->ih[1] , s->parts[k].x[2]*s->ih[2] );
s->cells[ ind[k] ].count += 1;
}
/* Sort the parts according to their cells. */
parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );
/* Update the condensed particle data. */
for ( k = 0 ; k < s->nr_parts ; k++ ) {
s->cparts[k].x[0] = s->parts[k].x[0];
s->cparts[k].x[1] = s->parts[k].x[1];
s->cparts[k].x[2] = s->parts[k].x[2];
s->cparts[k].h = s->parts[k].h;
s->cparts[k].dt = s->parts[k].dt;
}
/* Hook the cells up to the parts. */
for ( finger = s->parts , cfinger = s->cparts , k = 0 ; k < s->nr_cells ; k++ ) {
c = &s->cells[ k ];
c->parts = finger;
c->cparts = cfinger;
finger = &finger[ c->count ];
cfinger = &cfinger[ c->count ];
}
/* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */
#pragma omp parallel for shared(s) reduction(+:changes)
......@@ -303,7 +320,7 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
/**
* @brief Sort the particles according to the given indices.
* @brief Sort the particles and condensed particles according to the given indices.
*
* @param parts The list of #part
* @param ind The indices with respect to which the parts are sorted.
......@@ -321,57 +338,79 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
int temp_i;
struct part temp_p;
/* One pass of quicksort. */
while ( i < j ) {
while ( i < N && ind[i] <= pivot )
i++;
while ( j >= 0 && ind[j] > pivot )
j--;
if ( i < j ) {
temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i;
temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p;
}
/* If N is small enough, just do insert sort. */
if ( N < 16 ) {
for ( i = 1 ; i < N ; i++ )
if ( ind[i] < ind[i-1] ) {
temp_i = ind[i];
temp_p = parts[j];
for ( j = i ; j > 0 && ind[j-1] > temp_i ; j-- ) {
ind[j] = ind[j-1];
parts[j] = parts[j-1];
}
ind[j] = temp_i;
parts[j] = temp_p;
}
}
/* Verify sort. */
for ( int k = 0 ; k <= j ; k++ )
if ( ind[k] > pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (<=pivot)." );
/* Otherwise, recurse with Quicksort. */
else {
/* One pass of quicksort. */
while ( i < j ) {
while ( i < N && ind[i] <= pivot )
i++;
while ( j >= 0 && ind[j] > pivot )
j--;
if ( i < j ) {
temp_i = ind[i]; ind[i] = ind[j]; ind[j] = temp_i;
temp_p = parts[i]; parts[i] = parts[j]; parts[j] = temp_p;
}
}
for ( int k = j+1 ; k < N ; k++ )
if ( ind[k] <= pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (>pivot)." );
/* Verify sort. */
for ( int k = 0 ; k <= j ; k++ )
if ( ind[k] > pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (<=pivot)." );
}
for ( int k = j+1 ; k < N ; k++ )
if ( ind[k] <= pivot ) {
printf( "parts_sort: sorting failed at k=%i, ind[k]=%i, pivot=%i, i=%i, j=%i, N=%i.\n" , k , ind[k] , pivot , i , j , N );
error( "Sorting failed (>pivot)." );
}
/* Try to recurse in parallel. */
if ( N < 100 ) {
/* Recurse on the left? */
if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the right? */
if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
}
/* Try to recurse in parallel. */
if ( N < 100 ) {
/* Recurse on the left? */
if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the right? */
if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
else
#pragma omp parallel sections
{
}
else
#pragma omp parallel sections
{
/* Recurse on the left? */
#pragma omp section
if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the left? */
#pragma omp section
if ( j > 0 && pivot > min )
parts_sort( parts , ind , j+1 , min , pivot );
/* Recurse on the right? */
#pragma omp section
if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
/* Recurse on the right? */
#pragma omp section
if ( i < N && pivot+1 < max )
parts_sort( &parts[i], &ind[i], N-i , pivot+1 , max );
}
}
}
......@@ -1306,7 +1345,7 @@ void space_split ( struct space *s , struct cell *c ) {
/* Count the particles below that. */
for ( count = 0 , k = 0 ; k < c->count ; k++ ) {
h = c->parts[k].h;
h = c->cparts[k].h;
if ( h <= h_limit )
count += 1;
if ( h > h_max )
......@@ -1387,6 +1426,9 @@ void space_recycle ( struct space *s , struct cell *c ) {
/* Clear this cell's sort arrays. */
if ( c->sort != NULL )
free( c->sort );
/* Clear the cell data. */
bzero( c , sizeof(struct cell) );
/* Hook this cell into the buffer. */
c->next = s->cells_new;
......@@ -1465,6 +1507,10 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
s->nr_parts = N;
s->parts = parts;
/* Allocate the cparts array. */
if ( posix_memalign( (void *)&s->cparts , 32 , N * sizeof(struct cpart) ) != 0 )
error( "Failed to allocate cparts." );
/* Init the space lock. */
if ( lock_init( &s->lock ) != 0 )
error( "Failed to create space spin-lock." );
......
......@@ -75,6 +75,7 @@ struct space {
/* The particle data (cells have pointers to this). */
struct part *parts;
struct cpart *cparts;
/* The sortlist data (cells hold pointers to these). */
struct entry *sortlist;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment