Commit 405d95dc authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several fixes.


Former-commit-id: eea7a1846b4fde4575bbb84a9316481d7ffc122b
parent e1f6b236
......@@ -292,7 +292,7 @@ void runner_dopair_naive ( struct runner_thread *rt , struct cell *ci , struct c
/* Hit or miss? */
if ( r2 < ri2 || r2 < pj->r*pj->r ) {
iact( r2 , ri , pj->r , &pi->count , &pj->count , &pi->icount , &pj->icount );
iact( r2 , ri , pj->r , pi , pj );
}
......@@ -408,7 +408,7 @@ void runner_dopair ( struct runner_thread *rt , struct cell *ci , struct cell *c
/* Hit or miss? */
if ( r2 < ri2 ) {
iact( r2 , ri , pj->r , &pi->count , &pj->count , &pi->icount , &pj->icount );
iact( r2 , ri , pj->r , pi , pj );
}
......@@ -449,7 +449,7 @@ void runner_dopair ( struct runner_thread *rt , struct cell *ci , struct cell *c
/* Hit or miss? */
if ( r2 < rj2 && r2 > pi->r*pi->r ) {
iact( r2 , pi->r , rj , NULL , &pj->count , NULL , &pj->icount );
iact( r2 , pi->r , rj , pi , pj );
}
......@@ -511,7 +511,7 @@ void runner_doself ( struct runner_thread *rt , struct cell *c ) {
/* Hit or miss? */
if ( r2 < ri2 || r2 < pj->r*pj->r ) {
iact( r2 , ri , pj->r , &pi->count , &pj->count , &pi->icount , &pj->icount );
iact( r2 , ri , pj->r , pi , pj );
}
......
......@@ -122,7 +122,7 @@ long long int runner_hist_bins[ runner_hist_N ];
* @param jo Pointer to where to store the interaction of the ith particle.
*/
__attribute__ ((always_inline)) INLINE void iact ( float r2 , float hi , float hj , float *force_i , float *force_j , int *count_i , int *count_j ) {
__attribute__ ((always_inline)) INLINE void iact_nopart ( float r2 , float hi , float hj , float *force_i , float *force_j , int *count_i , int *count_j ) {
#define KERNEL_COEFF_1 2.546479089470f
#define KERNEL_COEFF_2 15.278874536822f
......@@ -174,6 +174,69 @@ __attribute__ ((always_inline)) INLINE void iact ( float r2 , float hi , float h
__attribute__ ((always_inline)) INLINE void iact ( float r2 , float hi , float hj , struct part *pi , struct part *pj ) {
#define KERNEL_COEFF_1 2.546479089470f
#define KERNEL_COEFF_2 15.278874536822f
#define KERNEL_COEFF_3 45.836623610466f
#define KERNEL_COEFF_4 30.557749073644f
#define KERNEL_COEFF_5 5.092958178941f
#define KERNEL_COEFF_6 (-15.278874536822f)
#define NORM_COEFF 4.188790204786f
float r = sqrtf( r2 );
float ui, uj, wi, wj;
float ui_dh, uj_dh, wi_dh, wj_dh;
if ( r2 < hi*hi && pi != NULL ) {
ui = r / hi;
ui_dh = -r / hi / hi;
if ( ui < 0.5f ) {
wi = KERNEL_COEFF_1 + KERNEL_COEFF_2 * (ui - 1.0f) * ui * ui;
wi_dh = KERNEL_COEFF_2 * ui_dh * ui * ui
+ 2 * KERNEL_COEFF_2 * (ui - 1.0f) * ui_dh * ui;
}
else {
wi = KERNEL_COEFF_5 * (1.0f - ui) * (1.0f - ui) * (1.0f - ui);
wi_dh = -3 * KERNEL_COEFF_5 * ui_dh * (1.0f - ui) * (1.0f - ui);
}
pi->count += NORM_COEFF * wi;
pi->count_dh += NORM_COEFF * wi_dh;
pi->icount += 1;
}
if ( r2 < hj*hj && pj != NULL ) {
uj = r / hj;
uj_dh = -r / hj / hj;
if ( uj < 0.5f ) {
wj = KERNEL_COEFF_1 + KERNEL_COEFF_2 * (uj - 1.0f) * uj * uj;
wj_dh = KERNEL_COEFF_2 * uj_dh * uj * uj
+ 2 * KERNEL_COEFF_2 * (uj - 1.0f) * uj_dh * uj;
}
else {
wj = KERNEL_COEFF_5 * (1.0f - uj) * (1.0f - uj) * (1.0f - uj);
wj_dh = -3 * KERNEL_COEFF_5 * uj_dh * (1.0f - uj) * (1.0f - uj);
}
pj->count += NORM_COEFF * wj;
pj->count_dh += NORM_COEFF * wj_dh;
pj->icount += 1;
}
#ifdef HIST
if ( hi > hj )
runner_hist_hit( hi / hj );
else
runner_hist_hit( hj / hi );
#endif
}
/* A task queue. */
struct queue {
......
......@@ -412,8 +412,7 @@ void space_splittasks ( struct space *s ) {
case 1: /* ( 1 , 1 , 0 ) */
if ( space_dosub &&
!ci->progeny[6]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[1]->split &&
ci->progeny[6]->count + ci->progeny[7]->count + cj->progeny[0]->count + cj->progeny[1]->count < space_splitsize ) {
!cj->progeny[0]->split && !cj->progeny[1]->split ) {
t->type = tid_sub; t->flags = 1;
task_addunlock( ci->progeny[6]->sorts[1] , t ); task_addunlock( cj->progeny[0]->sorts[1] , t );
task_addunlock( ci->progeny[7]->sorts[1] , t ); task_addunlock( cj->progeny[1]->sorts[1] , t );
......@@ -446,8 +445,7 @@ void space_splittasks ( struct space *s ) {
case 3: /* ( 1 , 0 , 1 ) */
if ( space_dosub &&
!ci->progeny[5]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[2]->split &&
ci->progeny[5]->count + ci->progeny[7]->count + cj->progeny[0]->count + cj->progeny[2]->count < space_splitsize ) {
!cj->progeny[0]->split && !cj->progeny[2]->split ) {
t->type = tid_sub; t->flags = 3;
task_addunlock( ci->progeny[5]->sorts[3] , t ); task_addunlock( cj->progeny[0]->sorts[3] , t );
task_addunlock( ci->progeny[7]->sorts[3] , t ); task_addunlock( cj->progeny[2]->sorts[3] , t );
......@@ -473,8 +471,7 @@ void space_splittasks ( struct space *s ) {
case 4: /* ( 1 , 0 , 0 ) */
if ( space_dosub &&
!ci->progeny[4]->split && !ci->progeny[5]->split && !ci->progeny[6]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[2]->split && !cj->progeny[3]->split &&
ci->progeny[4]->count + ci->progeny[5]->count + ci->progeny[6]->count + ci->progeny[7]->count + cj->progeny[0]->count + cj->progeny[1]->count + cj->progeny[2]->count + cj->progeny[3]->count < space_splitsize ) {
!cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[2]->split && !cj->progeny[3]->split ) {
t->type = tid_sub; t->flags = 4;
task_addunlock( ci->progeny[4]->sorts[4] , t ); task_addunlock( cj->progeny[0]->sorts[4] , t );
task_addunlock( ci->progeny[5]->sorts[5] , t ); task_addunlock( cj->progeny[0]->sorts[5] , t );
......@@ -540,8 +537,7 @@ void space_splittasks ( struct space *s ) {
case 5: /* ( 1 , 0 , -1 ) */
if ( space_dosub &&
!ci->progeny[4]->split && !ci->progeny[6]->split &&
!cj->progeny[1]->split && !cj->progeny[3]->split &&
ci->progeny[4]->count + ci->progeny[6]->count + cj->progeny[1]->count + cj->progeny[3]->count < space_splitsize ) {
!cj->progeny[1]->split && !cj->progeny[3]->split ) {
t->type = tid_sub; t->flags = 5;
task_addunlock( ci->progeny[4]->sorts[5] , t ); task_addunlock( cj->progeny[1]->sorts[5] , t );
task_addunlock( ci->progeny[6]->sorts[5] , t ); task_addunlock( cj->progeny[3]->sorts[5] , t );
......@@ -574,8 +570,7 @@ void space_splittasks ( struct space *s ) {
case 7: /* ( 1 , -1 , 0 ) */
if ( space_dosub &&
!ci->progeny[4]->split && !ci->progeny[5]->split &&
!cj->progeny[2]->split && !cj->progeny[3]->split &&
ci->progeny[4]->count + ci->progeny[5]->count + cj->progeny[2]->count + cj->progeny[3]->count < space_splitsize ) {
!cj->progeny[2]->split && !cj->progeny[3]->split ) {
t->type = tid_sub; t->flags = 7;
task_addunlock( ci->progeny[4]->sorts[6] , t ); task_addunlock( cj->progeny[3]->sorts[6] , t );
task_addunlock( ci->progeny[5]->sorts[8] , t ); task_addunlock( cj->progeny[2]->sorts[8] , t );
......@@ -608,8 +603,7 @@ void space_splittasks ( struct space *s ) {
case 9: /* ( 0 , 1 , 1 ) */
if ( space_dosub &&
!ci->progeny[3]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[4]->split &&
ci->progeny[3]->count + ci->progeny[7]->count + cj->progeny[0]->count + cj->progeny[4]->count < space_splitsize ) {
!cj->progeny[0]->split && !cj->progeny[4]->split ) {
t->type = tid_sub; t->flags = 9;
task_addunlock( ci->progeny[3]->sorts[9] , t ); task_addunlock( cj->progeny[0]->sorts[9] , t );
task_addunlock( ci->progeny[7]->sorts[9] , t ); task_addunlock( cj->progeny[4]->sorts[9] , t );
......@@ -634,8 +628,7 @@ void space_splittasks ( struct space *s ) {
case 10: /* ( 0 , 1 , 0 ) */
if ( !ci->progeny[2]->split && !ci->progeny[3]->split && !ci->progeny[6]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[4]->split && !cj->progeny[5]->split &&
ci->progeny[2]->count + ci->progeny[3]->count + ci->progeny[6]->count + ci->progeny[7]->count + cj->progeny[0]->count + cj->progeny[1]->count + cj->progeny[4]->count + cj->progeny[5]->count < space_splitsize ) {
!cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[4]->split && !cj->progeny[5]->split ) {
t->type = tid_sub; t->flags = 10;
task_addunlock( ci->progeny[2]->sorts[10] , t ); task_addunlock( cj->progeny[0]->sorts[10] , t );
task_addunlock( ci->progeny[3]->sorts[11] , t ); task_addunlock( cj->progeny[0]->sorts[11] , t );
......@@ -701,8 +694,7 @@ void space_splittasks ( struct space *s ) {
case 11: /* ( 0 , 1 , -1 ) */
if ( space_dosub &&
!ci->progeny[2]->split && !ci->progeny[6]->split &&
!cj->progeny[1]->split && !cj->progeny[5]->split &&
ci->progeny[2]->count + ci->progeny[6]->count + cj->progeny[1]->count + cj->progeny[5]->count < space_splitsize ) {
!cj->progeny[1]->split && !cj->progeny[5]->split ) {
t->type = tid_sub; t->flags = 11;
task_addunlock( ci->progeny[2]->sorts[11] , t ); task_addunlock( cj->progeny[1]->sorts[11] , t );
task_addunlock( ci->progeny[6]->sorts[11] , t ); task_addunlock( cj->progeny[5]->sorts[11] , t );
......@@ -728,8 +720,7 @@ void space_splittasks ( struct space *s ) {
case 12: /* ( 0 , 0 , 1 ) */
if ( space_dosub &&
!ci->progeny[1]->split && !ci->progeny[3]->split && !ci->progeny[5]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[2]->split && !cj->progeny[4]->split && !cj->progeny[6]->split &&
ci->progeny[1]->count + ci->progeny[3]->count + ci->progeny[5]->count + ci->progeny[7]->count + cj->progeny[0]->count + cj->progeny[2]->count + cj->progeny[4]->count + cj->progeny[6]->count < space_splitsize ) {
!cj->progeny[0]->split && !cj->progeny[2]->split && !cj->progeny[4]->split && !cj->progeny[6]->split ) {
t->type = tid_sub; t->flags = 12;
task_addunlock( ci->progeny[1]->sorts[12] , t ); task_addunlock( cj->progeny[0]->sorts[12] , t );
task_addunlock( ci->progeny[3]->sorts[11] , t ); task_addunlock( cj->progeny[0]->sorts[11] , t );
......
......@@ -86,7 +86,7 @@ struct part {
int id;
/* Number of pairwise interactions. */
float count;
double count, count_dh;
int icount;
} __attribute__((aligned (32)));
......
......@@ -128,7 +128,7 @@ void map_cellcheck ( struct cell *c , void *data ) {
p = &c->parts[k];
if ( p->x[0] < c->loc[0] || p->x[1] < c->loc[1] || p->x[2] < c->loc[2] ||
p->x[0] > c->loc[0] + c->h[0] || p->x[1] > c->loc[1] + c->h[1] || p->x[2] > c->loc[2] + c->h[2] ) {
printf( "map_cellcheck: particle at [ %e %e %e ] outside of cell [ %e %e %e ] - [ %e %e %e ].\n" ,
printf( "map_cellcheck: particle at [ %.16e %.16e %.16e ] outside of cell [ %.16e %.16e %.16e ] - [ %.16e %.16e %.16e ].\n" ,
p->x[0] , p->x[1] , p->x[2] ,
c->loc[0] , c->loc[1] , c->loc[2] ,
c->loc[0] + c->h[0] , c->loc[1] + c->h[1] , c->loc[2] + c->h[2] );
......@@ -164,7 +164,7 @@ void map_count ( struct part *p , struct cell *c , void *data ) {
double *count = (double *)data;
// printf( "%e\n" , p->count );
// printf( "%i %e %e\n" , p->id , p->count , p->count_dh );
*count += p->count;
......@@ -447,7 +447,7 @@ void pairs_n2 ( double *dim , struct part *__restrict__ parts , int N , int peri
r2 = dx[0]*dx[0] + dx[1]*dx[1] + dx[2]*dx[2];
if ( r2 < parts[j].r*parts[j].r || r2 < parts[k].r*parts[k].r ) {
f1 = 0.0; f2 = 0.0;
iact( r2 , parts[j].r , parts[k].r , &f1 , &f2 , &count , &count );
iact_nopart( r2 , parts[j].r , parts[k].r , &f1 , &f2 , &count , &count );
dcount += f1 + f2;
if ( parts[j].r / parts[k].r > maxratio )
#pragma omp critical
......@@ -552,6 +552,7 @@ int main ( int argc , char *argv[] ) {
parts[k].x[0] = ((double)rand()) / RAND_MAX * dim[0];
parts[k].x[1] = ((double)rand()) / RAND_MAX * dim[1];
parts[k].x[2] = ((double)rand()) / RAND_MAX * dim[2];
parts[k].id = k;
parts[k].r = r_min + ((r_max - r_min)*rand())/RAND_MAX;
}
printf( "main: allocated memory for %i parts.\n" , N ); fflush(stdout);
......@@ -651,9 +652,9 @@ int main ( int argc , char *argv[] ) {
printf( "main: cutoffs in [ %.3f %.3f ].\n" , s.r_min , s.r_max ); fflush(stdout);
/* Verify that each particle is in it's propper cell. */
icount = 0;
space_map_cells( &s , &map_cellcheck , &icount );
printf( "main: map_cellcheck picked up %i parts.\n" , icount );
// icount = 0;
// space_map_cells( &s , &map_cellcheck , &icount );
// printf( "main: map_cellcheck picked up %i parts.\n" , icount );
data[0] = s.maxdepth; data[1] = 0;
space_map_cells( &s , &map_maxdepth , data );
......@@ -723,6 +724,10 @@ int main ( int argc , char *argv[] ) {
(double)runner_hist_bins[k] );
#endif
/* Loop over the parts directly. */
// for ( k = 0 ; k < N ; k++ )
// printf( " %i %e %e\n" , s.parts[k].id , s.parts[k].count , s.parts[k].count_dh );
/* Get the average interactions per particle. */
count = 0;
space_map_parts( &s , &map_count , &count );
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment