Commit e4477009 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several changes and fixes.


Former-commit-id: e5e7bd22be9d0b084c4d349a0654817d014238a9
parent f5f5d17d
......@@ -19,7 +19,7 @@
AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -Wall -Werror $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math $(SIMD_FLAGS) $(CFLAGS) $(OPENMP_CFLAGS) -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
# Assign a "safe" version number
AM_LDFLAGS = $(LAPACK_LIBS) $(BLAS_LIBS) -version-info 0:0:0
......
......@@ -61,12 +61,18 @@ struct cell {
/* Parent cell. */
struct cell *parent;
/* Super cell, i.e. the highest-level supercell that has interactions. */
struct cell *super;
/* The tasks computing this cell's sorts. */
struct task *sorts[14];
/* The ghost task to link density to interactions. */
struct task *ghost;
/* Number of tasks that are associated with this cell. */
int nr_tasks;
/* Number of tasks this cell is waiting for and whether it is in use. */
int wait;
......
......@@ -20,3 +20,5 @@
/* Physical constants. */
#define const_gamma (5.0f/3.0f)
#define const_cfl 0.25f
#define const_nwneigh 48
......@@ -36,9 +36,6 @@ struct part {
/* Particle mass. */
float mass;
/* Particle density. */
float rho;
/* Particle ID. */
int id;
......@@ -51,8 +48,11 @@ struct part {
/* Particle acceleration. */
float a[3];
/* Particle density. */
float rho;
/* Particle pressure. */
float P;
// float P;
/* Aggregate quantities. */
float POrho2;
......
......@@ -454,6 +454,10 @@ void queue_sort ( struct queue *q ) {
wait[k] = t->rank;
weight[k] = 0; // t->ci->count;
break;
case task_type_ghost:
wait[k] = t->rank;
weight[k] = 0; // t->ci->count;
break;
}
}
......
......@@ -442,14 +442,51 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
void runner_doghost ( struct runner *r , struct cell *c ) {
struct part *p;
int i, k;
int i, k, redo, count = c->count;
int *pid;
float ihg, ihg2;
TIMER_TIC
/* Recurse? */
if ( c->split ) {
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL )
runner_doghost( r , c->progeny[k] );
return;
}
/* Init the IDs that have to be updated. */
if ( ( pid = (int *)alloca( sizeof(int) * count ) ) == NULL )
error( "Call to alloca failed." );
for ( k = 0 ; k < count ; k++ )
pid[k] = k;
/* While there are particles that need to be updated... */
while ( count > 0 ) {
/* Reset the redo-count. */
redo = 0;
/* Loop over the parts in this cell. */
for ( i = 0 ; i < c->count ; i++ ) {
for ( i = 0 ; i < count ; i++ ) {
/* Get a direct pointer on the part. */
p = &c->parts[i];
p = &c->parts[ pid[i] ];
/* Adjust the computed rho. */
ihg = kernel_igamma / p->h;
ihg2 = ihg * ihg;
p->rho *= ihg * ihg2;
p->rho_dh *= ihg2 * ihg2;
/* Did we get the right number density? */
if ( p->wcount + kernel_root > const_nwneigh + 1 ||
p->wcount + kernel_root < const_nwneigh - 1 ) {
printf( "runner_doghost: particle %i has bad wcount=%f.\n" , p->id , p->wcount + kernel_root );
pid[redo] = pid[i];
redo += 1;
continue;
}
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
......@@ -459,14 +496,24 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
p->u_dt = 0.0f;
p->h_dt = 0.0f;
/* Compute this particle's time step. */
p->dt = const_cfl * p->h / ( const_gamma * ( const_gamma - 1.0f ) * p->u );
/* Compute the pressure. */
p->P = p->rho * p->u * ( const_gamma - 1.0f );
// p->P = p->rho * p->u * ( const_gamma - 1.0f );
/* Compute the P/Omega/rho2. */
p->POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f );
}
/* Re-set the counter for the next loop (potentially). */
count = redo;
if ( count > 0 )
error( "Bad smoothing length, fixing this isn't implemented yet." );
}
#ifdef TIMER_VERBOSE
printf( "runner_doghost[%02i]: %i parts at depth %i took %.3f ms.\n" ,
r->id , c->count , c->depth ,
......@@ -624,7 +671,7 @@ void *runner_main ( void *data ) {
cell_unlocktree( cj );
break;
case task_type_ghost:
if ( t->flags )
if ( ci->super == ci )
runner_doghost( r , ci );
break;
default:
......
......@@ -30,7 +30,7 @@ static float kernel_coeffs[ (kernel_degree + 1) * (kernel_ivals + 1) ] =
{ 3.0/4.0*M_1_PI , -3.0/2.0*M_1_PI , 0.0 , M_1_PI ,
-0.25*M_1_PI , 3.0/2.0*M_1_PI , -3.0*M_1_PI , M_2_PI ,
0.0 , 0.0 , 0.0 , 0.0 };
#define kernel_root(h) ( 1.0f/((h)*(h)*(h))*kernel_igamma3*kernel_coeffs[ kernel_degree ] )
#define kernel_root ( 4.0/3.0*M_PI*kernel_igamma3*kernel_coeffs[ kernel_degree ] )
/**
......@@ -69,19 +69,18 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
float r = sqrtf( r2 );
float xi, xj;
float hg_inv, hg2_inv;
float hg_inv;
float wi, wj, wi_dx, wj_dx;
if ( r2 < hi*hi && pi != NULL ) {
hg_inv = kernel_igamma / hi;
hg2_inv = hg_inv * hg_inv;
xi = r * hg_inv;
kernel_deval( xi , &wi , &wi_dx );
pi->rho += pj->mass * hg_inv * hg2_inv * wi;
pi->rho_dh += -pj->mass * hg2_inv * hg2_inv * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * 4.0f * M_PI / 3.0f * kernel_igamma3;
pi->rho += pj->mass * wi;
pi->rho_dh += -pj->mass * ( 3.0*wi + xi*wi_dx );
pi->wcount += wi * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pi->icount += 1;
}
......@@ -89,13 +88,12 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_density ( float r
if ( r2 < hj*hj && pj != NULL ) {
hg_inv = kernel_igamma / hj;
hg2_inv = hg_inv * hg_inv;
xj = r * hg_inv;
kernel_deval( xj , &wj , &wj_dx );
pj->rho += pi->mass * hg_inv * hg2_inv * wj;
pj->rho_dh += -pi->mass * hg2_inv * hg2_inv * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * 4.0f * M_PI / 3.0f * kernel_igamma3;
pj->rho += pi->mass * wj;
pj->rho_dh += -pi->mass * ( 3.0*wj + xj*wj_dx );
pj->wcount += wj * ( 4.0f * M_PI / 3.0f * kernel_igamma3 );
pj->icount += 1;
}
......@@ -133,7 +131,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2
hjg2_inv = hjg_inv * hjg_inv;
xj = r * hjg_inv;
kernel_deval( xj , &wj , &wj_dx );
wj_dr = hjg2_inv *hjg2_inv * wj_dx;
wj_dr = hjg2_inv * hjg2_inv * wj_dx;
/* Get the common factor out. */
w = ri * ( pi->POrho2 * wi_dr + pj->POrho2 * wj_dr );
......
......@@ -100,10 +100,35 @@ void space_map_clearsort ( struct cell *c , void *data ) {
void space_map_mkghosts ( struct cell *c , void *data ) {
struct space *s = (struct space *)data;
struct cell *finger;
/* Make the ghost task. */
/* Find the super cell, i.e. the highest cell hierarchically above
this one to still have at least one task associated with it. */
c->super = c;
for ( finger = c->parent ; finger != NULL ; finger = finger->parent )
if ( finger->nr_tasks > 0 )
c->super = finger;
/* Make the ghost task, if needed. */
if ( c->super != c || c->nr_tasks > 0 )
c->ghost = space_addtask( s , task_type_ghost , task_subtype_none , 0 , 0 , c , NULL , NULL , 0 , NULL , 0 );
/* If we are not the super cell ourselves, make our ghost depend
on our parent cell. */
if ( c->super != c )
task_addunlock( c->parent->ghost , c->ghost );
}
/**
* @brief Mapping function to clear the number of tasks in each cell.
*/
void space_map_clearnrtasks ( struct cell *c , void *data ) {
c->nr_tasks = 0;
}
......@@ -594,7 +619,8 @@ void space_splittasks ( struct space *s ) {
break;
case 10: /* ( 0 , 1 , 0 ) */
if ( !ci->progeny[2]->split && !ci->progeny[3]->split && !ci->progeny[6]->split && !ci->progeny[7]->split &&
if ( space_dosub &&
!ci->progeny[2]->split && !ci->progeny[3]->split && !ci->progeny[6]->split && !ci->progeny[7]->split &&
!cj->progeny[0]->split && !cj->progeny[1]->split && !cj->progeny[4]->split && !cj->progeny[5]->split ) {
t->type = task_type_sub; t->flags = 10;
task_addunlock( ci->progeny[2]->sorts[10] , t ); task_addunlock( cj->progeny[0]->sorts[10] , t );
......@@ -885,7 +911,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Allocate the task-list, if needed. */
if ( s->tasks == NULL )
if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tot_cells * 14 ) != 0 )
if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tot_cells * 30 ) != 0 )
error( "Failed to allocate task list." );
s->nr_tasks = 0;
......@@ -933,6 +959,39 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Split the tasks. */
space_splittasks( s );
/* Remove sort tasks with no dependencies. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_sort && t->nr_unlock_tasks == 0 ) {
if ( t->ci->split )
for ( i = 0 ; i < 13 ; i++ )
if ( t->flags & ( 1 << i ) ) {
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL )
task_rmunlock( t->ci->progeny[j]->sorts[i] , t );
t->ci->sorts[i] = NULL;
}
t->type = task_type_none;
}
}
/* Count the number of tasks associated with each cell. */
space_map_cells( s , 1 , &space_map_mkghosts , NULL );
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_self )
t->ci->nr_tasks += 1;
else if ( t->type == task_type_pair ) {
t->ci->nr_tasks += 1;
t->cj->nr_tasks += 1;
}
else if ( t->type == task_type_sub ) {
t->ci->nr_tasks += 1;
if ( t->cj != NULL )
t->cj->nr_tasks += 1;
}
}
/* Append a ghost task to each cell. */
space_map_cells( s , 1 , &space_map_mkghosts , s );
......@@ -944,7 +1003,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Self-interaction? */
if ( t->type == task_type_self && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->ghost );
task_addunlock( t , t->ci->super->ghost );
t2 = space_addtask( s , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 );
}
......@@ -953,8 +1012,8 @@ void space_maketasks ( struct space *s , int do_sort ) {
else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) {
if ( t->ci->ghost == NULL )
printf( "space_maketasks: cell at %lx has no ghost!\n" , (unsigned long int)t->ci );
task_addunlock( t , t->ci->ghost );
task_addunlock( t , t->cj->ghost );
task_addunlock( t , t->ci->super->ghost );
task_addunlock( t , t->cj->super->ghost );
t2 = space_addtask( s , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 );
task_addunlock( t->cj->ghost , t2 );
......@@ -962,9 +1021,9 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Otherwise, sub interaction? */
else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->ghost );
task_addunlock( t , t->ci->super->ghost );
if ( t->cj != NULL )
task_addunlock( t , t->cj->ghost );
task_addunlock( t , t->cj->super->ghost );
t2 = space_addtask( s , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , NULL , 0 , NULL , 0 );
task_addunlock( t->ci->ghost , t2 );
if ( t->cj != NULL )
......@@ -983,33 +1042,6 @@ void space_maketasks ( struct space *s , int do_sort ) {
for ( k = 0 ; k < s->nr_tasks ; k++ )
s->tasks_ind[k] = k;
/* Remove sort and ghost tasks with no dependencies. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( ( t->type == task_type_sort || t->type == task_type_ghost ) && t->nr_unlock_tasks == 0 ) {
if ( t->type == task_type_sort && t->ci->split )
for ( i = 0 ; i < 13 ; i++ )
if ( t->flags & ( 1 << i ) ) {
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL )
task_rmunlock( t->ci->progeny[j]->sorts[i] , t );
t->ci->sorts[i] = NULL;
}
t->type = task_type_none;
}
}
/* Make each remaining ghost task unlock the ghosts of its progeny. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_ghost && t->ci->split )
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL )
task_addunlock( t->ci->ghost , t->ci->progeny[j]->ghost );
if ( t->type == task_type_ghost && ( t->ci->parent == NULL || t->ci->parent->ghost->type == task_type_none ) )
t->flags = 1;
}
/* Count the number of each task type. */
for ( k = 0 ; k < task_type_count ; k++ )
counts[k] = 0;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment