Commit f8c01941 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

added cparts, which contain only x, h, and dt of each part for faster, more cache-efficient access.


Former-commit-id: dbe48ffac91fdca4909a9de4484b9c5385e8f2cf
parent acf43028
...@@ -258,8 +258,18 @@ void cell_split ( struct cell *c ) { ...@@ -258,8 +258,18 @@ void cell_split ( struct cell *c ) {
for ( k = 0 ; k < 8 ; k++ ) { for ( k = 0 ; k < 8 ; k++ ) {
c->progeny[k]->count = right[k] - left[k] + 1; c->progeny[k]->count = right[k] - left[k] + 1;
c->progeny[k]->parts = &c->parts[ left[k] ]; c->progeny[k]->parts = &c->parts[ left[k] ];
c->progeny[k]->cparts = &c->cparts[ left[k] ];
} }
/* Update the condensed particle data. */
for ( k = 0 ; k < c->count ; k++ ) {
c->cparts[k].x[0] = c->parts[k].x[0];
c->cparts[k].x[1] = c->parts[k].x[1];
c->cparts[k].x[2] = c->parts[k].x[2];
c->cparts[k].h = c->parts[k].h;
c->cparts[k].dt = c->parts[k].dt;
}
/* Verify a few sub-cells. */ /* Verify a few sub-cells. */
/* for ( k = 0 ; k < c->progeny[0]->count ; k++ ) /* for ( k = 0 ; k < c->progeny[0]->count ; k++ )
if ( c->progeny[0]->parts[k].x[0] > pivot[0] || if ( c->progeny[0]->parts[k].x[0] > pivot[0] ||
......
...@@ -49,6 +49,7 @@ struct cell { ...@@ -49,6 +49,7 @@ struct cell {
/* Pointers to the particle data. */ /* Pointers to the particle data. */
struct part *parts; struct part *parts;
struct cpart *cparts;
/* Pointers for the sorted indices. */ /* Pointers for the sorted indices. */
struct entry *sort; struct entry *sort;
......
...@@ -24,9 +24,27 @@ ...@@ -24,9 +24,27 @@
#define part_dtmax 10 #define part_dtmax 10
/* Condensed data of a single particle. */
struct cpart {
/* Particle position. */
double x[3];
/* Particle cutoff radius. */
float h;
/* Particle time-step. */
float dt;
} __attribute__((aligned (32)));
/* Data of a single particle. */ /* Data of a single particle. */
struct part { struct part {
/* Particle position. */
double x[3];
/* Particle cutoff radius. */ /* Particle cutoff radius. */
float h; float h;
...@@ -39,9 +57,6 @@ struct part { ...@@ -39,9 +57,6 @@ struct part {
/* Particle ID. */ /* Particle ID. */
unsigned long long id; unsigned long long id;
/* Particle position. */
double x[3];
/* Particle velocity. */ /* Particle velocity. */
float v[3]; float v[3];
...@@ -70,7 +85,7 @@ struct part { ...@@ -70,7 +85,7 @@ struct part {
float rho_dh; float rho_dh;
/* Particle number density. */ /* Particle number density. */
int icount; // int icount;
float wcount; float wcount;
float wcount_dh; float wcount_dh;
......
...@@ -172,7 +172,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -172,7 +172,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
struct entry *finger; struct entry *finger;
struct entry *fingers[8]; struct entry *fingers[8];
struct part *parts = c->parts; struct cpart *cparts = c->cparts;
int j, k, count = c->count; int j, k, count = c->count;
int i, ind, off[8], inds[8], temp_i; int i, ind, off[8], inds[8], temp_i;
// float shift[3]; // float shift[3];
...@@ -262,9 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -262,9 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
/* Fill the sort array. */ /* Fill the sort array. */
for ( k = 0 ; k < count ; k++ ) { for ( k = 0 ; k < count ; k++ ) {
px[0] = parts[k].x[0]; px[0] = cparts[k].x[0];
px[1] = parts[k].x[1]; px[1] = cparts[k].x[1];
px[2] = parts[k].x[2]; px[2] = cparts[k].x[2];
for ( j = 0 ; j < 13 ; j++ ) for ( j = 0 ; j < 13 ; j++ )
if ( flags & (1 << j) ) { if ( flags & (1 << j) ) {
c->sort[ j*(count + 1) + k].i = k; c->sort[ j*(count + 1) + k].i = k;
...@@ -272,7 +272,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -272,7 +272,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
} }
if ( flags & (1 << 14) ) { if ( flags & (1 << 14) ) {
c->sort[ 14*(count + 1) + k ].i = k; c->sort[ 14*(count + 1) + k ].i = k;
c->sort[ 14*(count + 1) + k ].d = parts[k].dt; c->sort[ 14*(count + 1) + k ].d = cparts[k].dt;
} }
} }
......
...@@ -82,8 +82,9 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r ...@@ -82,8 +82,9 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
struct engine *e = r->e; struct engine *e = r->e;
int pid, pjd, k, count_i = ci->count, count_j = cj->count; int pid, pjd, k, count_i = ci->count, count_j = cj->count;
double shift[3] = { 0.0 , 0.0 , 0.0 }; double shift[3] = { 0.0 , 0.0 , 0.0 };
struct part *restrict pi, *restrict pj;
struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts; struct part *restrict parts_i = ci->parts, *restrict parts_j = cj->parts;
struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
struct cpart *restrict cpi, *restrict cparts_i = ci->cparts;
double pix[3]; double pix[3];
float dx[3], hi, hi2, r2; float dx[3], hi, hi2, r2;
TIMER_TIC TIMER_TIC
...@@ -105,29 +106,29 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r ...@@ -105,29 +106,29 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *restrict ci , struct cell *r
for ( pid = 0 ; pid < count_i ; pid++ ) { for ( pid = 0 ; pid < count_i ; pid++ ) {
/* Get a hold of the ith part in ci. */ /* Get a hold of the ith part in ci. */
pi = &parts_i[ pid ]; cpi = &cparts_i[ pid ];
for ( k = 0 ; k < 3 ; k++ ) for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k]; pix[k] = cpi->x[k] - shift[k];
hi = pi->h; hi = cpi->h;
hi2 = hi * hi; hi2 = hi * hi;
/* Loop over the parts in cj. */ /* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j ; pjd++ ) { for ( pjd = 0 ; pjd < count_j ; pjd++ ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pj = &parts_j[ pjd ]; cpj = &cparts_j[ pjd ];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k]; dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hi2 || r2 < pj->h*pj->h ) { if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
IACT( r2 , dx , hi , pj->h , pi , pj ); IACT( r2 , dx , hi , cpj->h , &parts_i[ pid ] , &parts_j[pjd] );
} }
...@@ -162,7 +163,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * ...@@ -162,7 +163,8 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
struct engine *e = r->e; struct engine *e = r->e;
int pid, pjd, sid, k, count_j = cj->count, flipped; int pid, pjd, sid, k, count_j = cj->count, flipped;
double shift[3] = { 0.0 , 0.0 , 0.0 }; double shift[3] = { 0.0 , 0.0 , 0.0 };
struct part *restrict pi, *restrict pj, *restrict parts_j = cj->parts; struct part *restrict pi, *restrict parts_j = cj->parts;
struct cpart *restrict cpj, *restrict cparts_j = cj->cparts;
double pix[3]; double pix[3];
float dx[3], hi, hi2, r2, di; float dx[3], hi, hi2, r2, di;
struct entry *sort_j; struct entry *sort_j;
...@@ -210,19 +212,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * ...@@ -210,19 +212,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) { for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[ pjd ].i ]; cpj = &cparts_j[ sort_j[ pjd ].i ];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k]; dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hi2 ) { if ( r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] );
} }
...@@ -250,19 +252,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * ...@@ -250,19 +252,19 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) { for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[ pjd ].i ]; cpj = &cparts_j[ sort_j[ pjd ].i ];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k]; dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hi2 ) { if ( r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_j[ sort_j[ pjd ].i ] );
} }
...@@ -297,7 +299,9 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * ...@@ -297,7 +299,9 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *restrict parts , int *restrict ind , int count ) { void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *restrict parts , int *restrict ind , int count ) {
int pid, pjd, k, count_i = ci->count; int pid, pjd, k, count_i = ci->count;
struct part *restrict pi, *restrict pj, *restrict parts_i = ci->parts; struct part *restrict parts_i = ci->parts;
struct part *restrict pi;
struct cpart *restrict cpj, *restrict cparts = ci->cparts;
double pix[3]; double pix[3];
float dx[3], hi, hi2, r2; float dx[3], hi, hi2, r2;
TIMER_TIC TIMER_TIC
...@@ -321,23 +325,19 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part * ...@@ -321,23 +325,19 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
for ( pjd = 0 ; pjd < count_i ; pjd++ ) { for ( pjd = 0 ; pjd < count_i ; pjd++ ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pj = &parts_i[ pjd ]; cpj = &cparts[ pjd ];
/* Skip the particle itself. */
if ( pj == pi )
continue;
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k]; dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hi2 ) { if ( r2 > 0.0f && r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj ); IACT_NONSYM( r2 , dx , hi , cpj->h , pi , &parts_i[ pjd ] );
} }
...@@ -370,7 +370,9 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -370,7 +370,9 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
double rshift, shift[3] = { 0.0 , 0.0 , 0.0 }; double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
struct cell *temp; struct cell *temp;
struct entry *restrict sort_i, *restrict sort_j; struct entry *restrict sort_i, *restrict sort_j;
struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j; struct part *restrict parts_i, *restrict parts_j;
struct cpart *restrict cpi, *restrict cparts_i;
struct cpart *restrict cpj, *restrict cparts_j;
double pix[3], pjx[3], di, dj; double pix[3], pjx[3], di, dj;
float dx[3], hi, hi2, hj, hj2, r2; float dx[3], hi, hi2, hj, hj2, r2;
double hi_max, hj_max; double hi_max, hj_max;
...@@ -427,6 +429,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -427,6 +429,7 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
hi_max = ci->h_max - rshift; hj_max = cj->h_max; hi_max = ci->h_max - rshift; hj_max = cj->h_max;
count_i = ci->count; count_j = cj->count; count_i = ci->count; count_j = cj->count;
parts_i = ci->parts; parts_j = cj->parts; parts_i = ci->parts; parts_j = cj->parts;
cparts_i = ci->cparts; cparts_j = cj->cparts;
di_max = sort_i[count_i-1].d - rshift; di_max = sort_i[count_i-1].d - rshift;
dj_min = sort_j[0].d; dj_min = sort_j[0].d;
...@@ -437,33 +440,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -437,33 +440,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) { for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
/* Get a hold of the ith part in ci. */ /* Get a hold of the ith part in ci. */
pi = &parts_i[ sort_i[ pid ].i ]; cpi = &cparts_i[ sort_i[ pid ].i ];
hi = pi->h; hi = cpi->h;
di = sort_i[pid].d + hi - rshift; di = sort_i[pid].d + hi - rshift;
if ( di < dj_min ) if ( di < dj_min )
continue; continue;
hi2 = pi->h * pi->h; hi2 = hi * hi;
for ( k = 0 ; k < 3 ; k++ ) for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k]; pix[k] = cpi->x[k] - shift[k];
/* Loop over the parts in cj. */ /* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d < di ; pjd++ ) { for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d < di ; pjd++ ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[pjd].i ]; cpj = &cparts_j[ sort_j[pjd].i ];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k]; dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hi2 ) { if ( r2 < hi2 ) {
IACT( r2 , dx , hi , pj->h , pi , pj ); IACT( r2 , dx , hi , cpj->h , &parts_i[ sort_i[ pid ].i ] , &parts_j[ sort_j[pjd].i ] );
} }
...@@ -478,33 +481,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric ...@@ -478,33 +481,33 @@ void DOPAIR ( struct runner *r , struct cell *restrict ci , struct cell *restric
for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) { for ( pjd = 0 ; pjd < count_j && sort_j[pjd].d - hj_max < di_max ; pjd++ ) {
/* Get a hold of the jth part in cj. */ /* Get a hold of the jth part in cj. */
pj = &parts_j[ sort_j[ pjd ].i ]; cpj = &cparts_j[ sort_j[ pjd ].i ];
hj = pj->h; hj = cpj->h;
dj = sort_j[pjd].d - hj - rshift; dj = sort_j[pjd].d - hj - rshift;
if ( dj > di_max ) if ( dj > di_max )
continue; continue;
for ( k = 0 ; k < 3 ; k++ ) for ( k = 0 ; k < 3 ; k++ )
pjx[k] = pj->x[k] + shift[k]; pjx[k] = cpj->x[k] + shift[k];
hj2 = pj->h * pj->h; hj2 = hj * hj;
/* Loop over the parts in ci. */ /* Loop over the parts in ci. */
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) { for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d > dj ; pid-- ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pi = &parts_i[ sort_i[pid].i ]; cpi = &cparts_i[ sort_i[pid].i ];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pi->x[k] - pjx[k]; dx[k] = cpi->x[k] - pjx[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hj2 && r2 > pi->h*pi->h ) { if ( r2 < hj2 && r2 > cpi->h*cpi->h ) {
IACT( r2 , dx , pi->h , hj , pi , pj ); IACT( r2 , dx , cpi->h , hj , &parts_i[ sort_i[pid].i ] , &parts_j[ sort_j[ pjd ].i ] );
} }
...@@ -533,7 +536,8 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) { ...@@ -533,7 +536,8 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
int k, pid, pjd, count = c->count; int k, pid, pjd, count = c->count;
double pix[3]; double pix[3];
float dx[3], hi, hi2, r2; float dx[3], hi, hi2, r2;
struct part *restrict pi, *restrict pj, *restrict parts = c->parts; struct part *restrict parts = c->parts;
struct cpart *restrict cpi, *restrict cpj, *restrict cparts = c->cparts;
TIMER_TIC TIMER_TIC
if ( c->split ) if ( c->split )
...@@ -543,31 +547,31 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) { ...@@ -543,31 +547,31 @@ void DOSELF ( struct runner *r , struct cell *restrict c ) {
for ( pid = 0 ; pid < count ; pid++ ) { for ( pid = 0 ; pid < count ; pid++ ) {
/* Get a pointer to the ith particle. */ /* Get a pointer to the ith particle. */
pi = &parts[pid]; cpi = &cparts[pid];
/* Get the particle position and radius. */ /* Get the particle position and radius. */
for ( k = 0 ; k < 3 ; k++ ) for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k]; pix[k] = cpi->x[k];
hi = pi->h; hi = cpi->h;
hi2 = hi * hi; hi2 = hi * hi;
/* Loop over the other particles .*/ /* Loop over the other particles .*/
for ( pjd = pid+1 ; pjd < count ; pjd++ ) { for ( pjd = pid+1 ; pjd < count ; pjd++ ) {
/* Get a pointer to the jth particle. */ /* Get a pointer to the jth particle. */
pj = &parts[pjd]; cpj = &cparts[pjd];
/* Compute the pairwise distance. */ /* Compute the pairwise distance. */
r2 = 0.0f; r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) { for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k]; dx[k] = pix[k] - cpj->x[k];
r2 += dx[k]*dx[k]; r2 += dx[k]*dx[k];
} }
/* Hit or miss? */ /* Hit or miss? */
if ( r2 < hi2 || r2 < pj->h*pj->h ) { if ( r2 < hi2 || r2 < cpj->h*cpj->h ) {
IACT( r2 , dx , hi , pj->h , pi , pj ); IACT( r2 , dx , hi , cpj->h , &parts[pid] , &parts[pjd] );
} }
......
...@@ -83,7 +83,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r ...@@ -83,7 +83,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx ); pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->icount += 1; // pi->icount += 1;
} }
...@@ -98,7 +98,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r ...@@ -98,7 +98,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx ); pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 ); pj->wcount_dh -= xj * h_inv * wj_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->icount += 1;