Commit 8a598986 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

use sorted interactions for dopair/doself subsets.


Former-commit-id: 0607d171148f8df8f53837dc3c10612e86d50e05
parent cb8baf25
......@@ -486,7 +486,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Did we get the right number density? */
if ( p->wcount + kernel_root > const_nwneigh + 1 ||
p->wcount + kernel_root < const_nwneigh - 1 ) {
printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root ); fflush(stdout);
// printf( "runner_doghost: particle %i (h=%e) has bad wcount=%f.\n" , p->id , p->h , p->wcount + kernel_root ); fflush(stdout);
pid[redo] = pid[i];
redo += 1;
p->wcount = 0.0;
......
......@@ -47,6 +47,9 @@
#define DOSUB_SUBSET2(f) PASTE(runner_dosub_subset,f)
#define DOSUB_SUBSET DOSUB_SUBSET2(FUNCTION)
#define IACT_NONSYM2(f) PASTE(runner_iact_nonsym,f)
#define IACT_NONSYM IACT_NONSYM2(FUNCTION)
#define IACT2(f) PASTE(runner_iact,f)
#define IACT IACT2(FUNCTION)
......@@ -156,11 +159,12 @@ void DOPAIR_NAIVE ( struct runner *r , struct cell *ci , struct cell *cj ) {
void DOPAIR_SUBSET ( struct runner *r , struct cell *ci , struct part *parts_i , int *ind , int count , struct cell *cj ) {
struct engine *e = r->e;
int pid, pjd, k, count_j = cj->count;
int pid, pjd, sid, k, count_j = cj->count, flipped;
double shift[3] = { 0.0 , 0.0 , 0.0 };
struct part *pi, *pj, *parts_j = cj->parts;
double pix[3];
float dx[3], hi, hi2, r2;
float dx[3], hi, hi2, r2, di;
struct entry *sort_j;
TIMER_TIC
/* Get the relative distance between the pairs, wrapping. */
......@@ -171,44 +175,101 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *ci , struct part *parts_i ,
shift[k] = -e->s->dim[k];
}
/* Get the sorting index. */
for ( sid = 0 , k = 0 ; k < 3 ; k++ )
sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
/* Switch the cells around? */
flipped = runner_flip[sid];
sid = sortlistID[sid];
/* printf( "runner_dopair_naive: doing pair [ %g %g %g ]/[ %g %g %g ] with %i/%i parts and shift = [ %g %g %g ].\n" ,
ci->loc[0] , ci->loc[1] , ci->loc[2] , cj->loc[0] , cj->loc[1] , cj->loc[2] ,
ci->count , cj->count , shift[0] , shift[1] , shift[2] ); fflush(stdout);
tic = getticks(); */
/* Loop over the parts in ci. */
for ( pid = 0 ; pid < count ; pid++ ) {
/* Pick-out the sorted lists. */
sort_j = &cj->sort[ sid*(cj->count + 1) ];
/* Get a hold of the ith part in ci. */
pi = &parts_i[ ind[ pid ] ];
for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k];
hi = pi->h;
hi2 = hi * hi;
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ pjd ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT( r2 , dx , hi , 0.0f , pi , pj );
/* Parts are on the left? */
if ( !flipped ) {
/* Loop over the parts_i. */
for ( pid = 0 ; pid < count ; pid++ ) {
/* Get a hold of the ith part in ci. */
pi = &parts_i[ ind[ pid ] ];
for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k];
hi = pi->h;
hi2 = hi * hi;
di = hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
/* Loop over the parts in cj. */
for ( pjd = 0 ; pjd < count_j && sort_j[ pjd ].d < di ; pjd++ ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[ pjd ].i ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
}
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
}
}
} /* loop over the parts in cj. */
/* Parts are on the right. */
else {
} /* loop over the parts in ci. */
/* Loop over the parts_i. */
for ( pid = 0 ; pid < count ; pid++ ) {
/* Get a hold of the ith part in ci. */
pi = &parts_i[ ind[ pid ] ];
for ( k = 0 ; k < 3 ; k++ )
pix[k] = pi->x[k] - shift[k];
hi = pi->h;
hi2 = hi * hi;
di = -hi + pix[0]*runner_shift[ 3*sid + 0 ] + pix[1]*runner_shift[ 3*sid + 1 ] + pix[2]*runner_shift[ 3*sid + 2 ];
/* Loop over the parts in cj. */
for ( pjd = count_j-1 ; pjd >= 0 && di < sort_j[ pjd ].d ; pjd-- ) {
/* Get a pointer to the jth particle. */
pj = &parts_j[ sort_j[ pjd ].i ];
/* Compute the pairwise distance. */
r2 = 0.0f;
for ( k = 0 ; k < 3 ; k++ ) {
dx[k] = pix[k] - pj->x[k];
r2 += dx[k]*dx[k];
}
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
}
} /* loop over the parts in cj. */
} /* loop over the parts in ci. */
}
#ifdef TIMER_VERBOSE
printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
......@@ -275,7 +336,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *ci , struct part *parts , i
/* Hit or miss? */
if ( r2 < hi2 ) {
IACT( r2 , dx , hi , 0.0f , pi , pj );
IACT_NONSYM( r2 , dx , hi , pj->h , pi , pj );
}
......
......@@ -113,6 +113,36 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
/**
* @brief Density kernel
*/
__attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_density ( float r2 , float *dx , float hi , float hj , struct part *pi , struct part *pj ) {
float r = sqrtf( r2 );
float xi;
float h_inv, hg_inv;
float wi, wi_dx;
if ( r2 < hi*hi && pi != NULL ) {
h_inv = 1.0 / hi;
hg_inv = kernel_igamma * h_inv;
xi = r * hg_inv;
kernel_deval( xi , &wi , &wi_dx );
pi->rho += pj->mass * wi;
pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->wcount_dh -= xi * h_inv * wi_dx * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->icount += 1;
}
}
__attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2 , float *dx , float hi , float hj , struct part *pi , struct part *pj ) {
float r = sqrtf( r2 ), ri = 1.0f / r;
......@@ -170,3 +200,50 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
__attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( float r2 , float *dx , float hi , float hj , struct part *pi , struct part *pj ) {
float r = sqrtf( r2 ), ri = 1.0f / r;
float xi, xj;
float hig_inv, hig2_inv;
float hjg_inv, hjg2_inv;
float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr;
float f;
int k;
/* Get the kernel for hi. */
hig_inv = kernel_igamma / hi;
hig2_inv = hig_inv * hig_inv;
xi = r * hig_inv;
kernel_deval( xi , &wi , &wi_dx );
wi_dr = hig2_inv * hig2_inv * wi_dx;
/* Get the kernel for hj. */
hjg_inv = kernel_igamma / hj;
hjg2_inv = hjg_inv * hjg_inv;
xj = r * hjg_inv;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hjg2_inv * hjg2_inv * wj_dx;
/* Get the common factor out. */
w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr );
/* Use the force, Luke! */
for ( k = 0 ; k < 3 ; k++ ) {
f = dx[k] * w;
pi->a[k] += pj->mass * f;
}
/* Compute dv dot r. */
dvdr = ( pi->v[0] - pj->v[0] ) * dx[0] + ( pi->v[1] - pj->v[1] ) * dx[1] + ( pi->v[2] - pj->v[2] ) * dx[2];
dvdr *= ri;
/* Get the time derivative for u. */
pi->u_dt += pj->mass * dvdr * wi_dr;
/* Get the time derivative for h. */
pi->h_dt += pj->mass / pj->rho * dvdr * wi_dr;
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment