Commit e4477009 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several changes and fixes.


Former-commit-id: e5e7bd22be9d0b084c4d349a0654817d014238a9
parent f5f5d17d
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
AUTOMAKE_OPTIONS=gnu AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing # Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -Wall -Werror $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) -DTIMER -DCOUNTER -DCPU_TPS=2.67e9 AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number # Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) -version-info 0:0:0 AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) -version-info 0:0:0
......
...@@ -61,12 +61,18 @@ struct cell { ...@@ -61,12 +61,18 @@ struct cell {
/* Parent cell. */ /* Parent cell. */
struct cell *parent; struct cell *parent;
/* Super cell, i.e. the highest-level supercell that has interactions. */
struct cell *super;
/* The tasks computing this cell's sorts. */ /* The tasks computing this cell's sorts. */
struct task *sorts[14]; struct task *sorts[14];
/* The ghost task to link density to interactions. */ /* The ghost task to link density to interactions. */
struct task *ghost; struct task *ghost;
/* Number of tasks that are associated with this cell. */
int nr_tasks;
/* Number of tasks this cell is waiting for and whether it is in use. */ /* Number of tasks this cell is waiting for and whether it is in use. */
int wait; int wait;
......
...@@ -19,4 +19,6 @@ ...@@ -19,4 +19,6 @@
/* Physical constants. */ /* Physical constants. */
#define const_gamma (5.0f/3.0f) #define const_gamma (5.0f/3.0f)
#define const_cfl 0.25f
#define const_nwneigh 48
...@@ -36,9 +36,6 @@ struct part { ...@@ -36,9 +36,6 @@ struct part {
/* Particle mass. */ /* Particle mass. */
float mass; float mass;
/* Particle density. */
float rho;
/* Particle ID. */ /* Particle ID. */
int id; int id;
...@@ -51,8 +48,11 @@ struct part { ...@@ -51,8 +48,11 @@ struct part {
/* Particle acceleration. */ /* Particle acceleration. */
float a[3]; float a[3];
/* Particle density. */
float rho;
/* Particle pressure. */ /* Particle pressure. */
float P; // float P;
/* Aggregate quantities. */ /* Aggregate quantities. */
float POrho2; float POrho2;
......
...@@ -454,6 +454,10 @@ void queue_sort ( struct queue *q ) { ...@@ -454,6 +454,10 @@ void queue_sort ( struct queue *q ) {
wait[k] = t->rank; wait[k] = t->rank;
weight[k] = 0; // t->ci->count; weight[k] = 0; // t->ci->count;
break; break;
case task_type_ghost:
wait[k] = t->rank;
weight[k] = 0; // t->ci->count;
break;
} }
} }
......
...@@ -442,29 +442,76 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -442,29 +442,76 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
void runner_doghost ( struct runner *r , struct cell *c ) { void runner_doghost ( struct runner *r , struct cell *c ) {
struct part *p; struct part *p;
int i, k; int i, k, redo, count = c->count;
int *pid;
float ihg, ihg2;
TIMER_TIC TIMER_TIC
/* Loop over the parts in this cell. */ /* Recurse? */
for ( i = 0 ; i < c->count ; i++ ) { if ( c->split ) {
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL )
runner_doghost( r , c->progeny[k] );
return;
}
/* Init the IDs that have to be updated. */
if ( ( pid = (int *)alloca( sizeof(int) * count ) ) == NULL )
error( "Call to alloca failed." );
for ( k = 0 ; k < count ; k++ )
pid[k] = k;
/* While there are particles that need to be updated... */
while ( count > 0 ) {
/* Get a direct pointer on the part. */ /* Reset the redo-count. */
p = &c->parts[i]; redo = 0;
/* Reset the acceleration. */ /* Loop over the parts in this cell. */
for ( k = 0 ; k < 3 ; k++ ) for ( i = 0 ; i < count ; i++ ) {
p->a[k] = 0.0f;
/* Get a direct pointer on the part. */
p = &c->parts[ pid[i] ];
/* Adjust the computed rho. */
ihg = kernel_igamma / p->h;
ihg2 = ihg * ihg;
p->rho *= ihg * ihg2;
p->rho_dh *= ihg2 * ihg2;
/* Did we get the right number density? */
if ( p->wcount + kernel_root > const_nwneigh + 1 ||
p->wcount + kernel_root < const_nwneigh - 1 ) {
printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root );
pid[redo] = pid[i];
redo += 1;
continue;
}
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
/* Compute this particle's time step. */
p->dt = const_cfl * p->h / ( const_gamma * ( const_gamma - 1.0f ) * p->u );
/* Compute the pressure. */
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
/* Compute the P/Omega/rho2. */
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
/* Reset the time derivatives. */ /* Re-set the counter for the next loop (potentially). */
p->u_dt = 0.0f; count = redo;
p->h_dt = 0.0f; if ( count > 0 )
error( "Bad smoothing length, fixing this isn't implemented yet." );
/* Compute the pressure. */
p->P = p->rho * p->u * ( const_gamma - 1.0f );
/* Compute the P/Omega/rho2. */
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
} }
#ifdef TIMER_VERBOSE #ifdef TIMER_VERBOSE
...@@ -624,7 +671,7 @@ void *runner_main ( void *data ) { ...@@ -624,7 +671,7 @@ void *runner_main ( void *data ) {
cell_unlocktree( cj ); cell_unlocktree( cj );
break; break;
case task_type_ghost: case task_type_ghost:
if ( t->flags ) if ( ci->super == ci )
runner_doghost( r , ci ); runner_doghost( r , ci );
break; break;
default: default:
......
...@@ -30,7 +30,7 @@ static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] = ...@@ -30,7 +30,7 @@ static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] =
{ 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI , { 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI ,
-0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI , -0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI ,
0.0 , 0.0 , 0.0 , 0.0 }; 0.0 , 0.0 , 0.0 , 0.0 };
#define kernel_root(h) ( 1.0f/((h)*(h)*(h))*kernel_igamma3*kernel_coeffs[ kernel_degree ] ) #define kernel_root ( 4.0/3.0*M_PI*kernel_igamma3*kernel_coeffs[ kernel_degree ] )
/** /**
...@@ -69,19 +69,18 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r ...@@ -69,19 +69,18 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
float r = sqrtf( r2 ); float r = sqrtf( r2 );
float xi, xj; float xi, xj;
float hg_inv, hg2_inv; float hg_inv;
float wi, wj, wi_dx, wj_dx; float wi, wj, wi_dx, wj_dx;
if ( r2 < hi*hi && pi != NULL ) { if ( r2 < hi*hi && pi != NULL ) {
hg_inv = kernel_igamma / hi; hg_inv = kernel_igamma / hi;
hg2_inv = hg_inv * hg_inv;
xi = r * hg_inv; xi = r * hg_inv;
kernel_deval( xi , &wi , &wi_dx ); kernel_deval( xi , &wi , &wi_dx );
pi->rho += pj->mass * hg_inv * hg2_inv * wi; pi->rho += pj->mass * wi;
pi->rho_dh += -pj->mass * hg2_inv * hg2_inv * ( 3.0*wi + xi*wi_dx ); pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * 4.0f * M_PI / 3.0f * kernel_igamma3; pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->icount += 1; pi->icount += 1;
} }
...@@ -89,13 +88,12 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r ...@@ -89,13 +88,12 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
if ( r2 < hj*hj && pj != NULL ) { if ( r2 < hj*hj && pj != NULL ) {
hg_inv = kernel_igamma / hj; hg_inv = kernel_igamma / hj;
hg2_inv = hg_inv * hg_inv;
xj = r * hg_inv; xj = r * hg_inv;
kernel_deval( xj , &wj , &wj_dx ); kernel_deval( xj , &wj , &wj_dx );
pj->rho += pi->mass * hg_inv * hg2_inv * wj; pj->rho += pi->mass * wj;
pj->rho_dh += -pi->mass * hg2_inv * hg2_inv * ( 3.0*wj + xj*wj_dx ); pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * 4.0f * M_PI / 3.0f * kernel_igamma3; pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->icount += 1; pj->icount += 1;
} }
...@@ -133,7 +131,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2 ...@@ -133,7 +131,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
hjg2_inv = hjg_inv * hjg_inv; hjg2_inv = hjg_inv * hjg_inv;
xj = r * hjg_inv; xj = r * hjg_inv;
kernel_deval( xj , &wj , &wj_dx ); kernel_deval( xj , &wj , &wj_dx );
wj_dr = hjg2_inv *hjg2_inv * wj_dx; wj_dr = hjg2_inv * hjg2_inv * wj_dx;
/* Get the common factor out. */ /* Get the common factor out. */
w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr ); w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr );
......
...@@ -100,9 +100,34 @@ void space_map_clearsort ( struct cell *c , void *data ) { ...@@ -100,9 +100,34 @@ void space_map_clearsort ( struct cell *c , void *data ) {
void space_map_mkghosts ( struct cell *c , void *data ) { void space_map_mkghosts ( struct cell *c , void *data ) {
struct space *s = (struct space *)data; struct space *s = (struct space *)data;
struct cell *finger;
/* Find the super cell, i.e. the highest cell hierarchically above
this one to still have at least one task associated with it. */
c->super = c;
for ( finger = c->parent ; finger != NULL ; finger = finger->parent )
if ( finger->nr_tasks > 0 )
c->super = finger;
/* Make the ghost task, if needed. */
if ( c->super != c || c->nr_tasks > 0 )
c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , NULL , 0 , NULL , 0 );
/* If we are not the super cell ourselves, make our ghost depend
on our parent cell. */
if ( c->super != c )
task_addunlock( c->parent->ghost , c->ghost );
}
/**
* @brief Mapping function to clear the number of tasks in each cell.
*/
void space_map_clearnrtasks ( struct cell *c , void *data ) {
/* Make the ghost task. */ c->nr_tasks = 0;
c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , NULL , 0 , NULL , 0 );
} }
...@@ -594,7 +619,8 @@ void space_splittasks ( struct space *s ) { ...@@ -594,7 +619,8 @@ void space_splittasks ( struct space *s ) {
break; break;
case 10: /* ( 0 , 1 , 0 ) */ case 10: /* ( 0 , 1 , 0 ) */
if ( !ci->progeny[2]->split && !ci->progeny[3]->split && !ci->progeny[6]->split && !ci->progeny[7]->split && if ( space_dosub &&
!ci->progeny[2]->split && !ci->progeny[3]->split && !ci->progeny[6]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[4]->split && !cj->progeny[5]->split ) { !cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[4]->split && !cj->progeny[5]->split ) {
t->type = task_type_sub; t->flags = 10; t->type = task_type_sub; t->flags = 10;
task_addunlock( ci->progeny[2]->sorts[10] , t ); task_addunlock( cj->progeny[0]->sorts[10] , t ); task_addunlock( ci->progeny[2]->sorts[10] , t ); task_addunlock( cj->progeny[0]->sorts[10] , t );
...@@ -885,7 +911,7 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -885,7 +911,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Allocate the task-list, if needed. */ /* Allocate the task-list, if needed. */
if ( s->tasks == NULL ) if ( s->tasks == NULL )
if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tot_cells * 14 ) != 0 ) if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tot_cells * 30 ) != 0 )
error( "Failed to allocate task list." ); error( "Failed to allocate task list." );
s->nr_tasks = 0; s->nr_tasks = 0;
...@@ -933,6 +959,39 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -933,6 +959,39 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Split the tasks. */ /* Split the tasks. */
space_splittasks( s ); space_splittasks( s );
/* Remove sort tasks with no dependencies. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_sort && t->nr_unlock_tasks == 0 ) {
if ( t->ci->split )
for ( i = 0 ; i < 13 ; i++ )
if ( t->flags & ( 1 << i ) ) {
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL )
task_rmunlock( t->ci->progeny[j]->sorts[i] , t );
t->ci->sorts[i] = NULL;
}
t->type = task_type_none;
}
}
/* Count the number of tasks associated with each cell. */
space_map_cells( s , 1 , &space_map_mkghosts , NULL );
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_self )
t->ci->nr_tasks += 1;
else if ( t->type == task_type_pair ) {
t->ci->nr_tasks += 1;
t->cj->nr_tasks += 1;
}
else if ( t->type == task_type_sub ) {
t->ci->nr_tasks += 1;
if ( t->cj != NULL )
t->cj->nr_tasks += 1;
}
}
/* Append a ghost task to each cell. */ /* Append a ghost task to each cell. */
space_map_cells( s , 1 , &space_map_mkghosts , s ); space_map_cells( s , 1 , &space_map_mkghosts , s );
...@@ -944,7 +1003,7 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -944,7 +1003,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Self-interaction? */ /* Self-interaction? */
if ( t->type == task_type_self && t->subtype == task_subtype_density ) { if ( t->type == task_type_self && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->ghost ); task_addunlock( t , t->ci->super->ghost );
t2 = space_addtask( s , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , NULL , 0 , NULL , 0 ); t2 = space_addtask( s , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 ); task_addunlock( t->ci->ghost , t2 );
} }
...@@ -953,8 +1012,8 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -953,8 +1012,8 @@ void space_maketasks ( struct space *s , int do_sort ) {
else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) { else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) {
if ( t->ci->ghost == NULL ) if ( t->ci->ghost == NULL )
printf( "space_maketasks: cell at %lx has no ghost!\n" , (unsigned long int)t->ci ); printf( "space_maketasks: cell at %lx has no ghost!\n" , (unsigned long int)t->ci );
task_addunlock( t , t->ci->ghost ); task_addunlock( t , t->ci->super->ghost );
task_addunlock( t , t->cj->ghost ); task_addunlock( t , t->cj->super->ghost );
t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 ); t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 ); task_addunlock( t->ci->ghost , t2 );
task_addunlock( t->cj->ghost , t2 ); task_addunlock( t->cj->ghost , t2 );
...@@ -962,9 +1021,9 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -962,9 +1021,9 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Otherwise, sub interaction? */ /* Otherwise, sub interaction? */
else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) { else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->ghost ); task_addunlock( t , t->ci->super->ghost );
if ( t->cj != NULL ) if ( t->cj != NULL )
task_addunlock( t , t->cj->ghost ); task_addunlock( t , t->cj->super->ghost );
t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 ); t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 ); task_addunlock( t->ci->ghost , t2 );
if ( t->cj != NULL ) if ( t->cj != NULL )
...@@ -983,33 +1042,6 @@ void space_maketasks ( struct space *s , int do_sort ) { ...@@ -983,33 +1042,6 @@ void space_maketasks ( struct space *s , int do_sort ) {
for ( k = 0 ; k < s->nr_tasks ; k++ ) for ( k = 0 ; k < s->nr_tasks ; k++ )
s->tasks_ind[k] = k; s->tasks_ind[k] = k;
/* Remove sort and ghost tasks with no dependencies. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( ( t->type == task_type_sort || t->type == task_type_ghost ) && t->nr_unlock_tasks == 0 ) {
if ( t->type == task_type_sort && t->ci->split )
for ( i = 0 ; i < 13 ; i++ )
if ( t->flags & ( 1 << i ) ) {
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL )
task_rmunlock( t->ci->progeny[j]->sorts[i] , t );
t->ci->sorts[i] = NULL;
}
t->type = task_type_none;
}
}
/* Make each remaining ghost task unlock the ghosts of its progeny. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_ghost && t->ci->split )
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL )
task_addunlock( t->ci->ghost , t->ci->progeny[j]->ghost );
if ( t->type == task_type_ghost && ( t->ci->parent == NULL || t->ci->parent->ghost->type == task_type_none ) )
t->flags = 1;
}
/* Count the number of each task type. */ /* Count the number of each task type. */
for ( k = 0 ; k < task_type_count ; k++ ) for ( k = 0 ; k < task_type_count ; k++ )
counts[k] = 0; counts[k] = 0;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment