Commit 9494cf3b authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several bug-fixes, tasks now only re-generated when cells change.


Former-commit-id: c89209038aa1469220b59886be0c29f5f38119d6
parent c826e1a1
......@@ -65,6 +65,7 @@ struct cell {
/* Pointers for the sorted indices. */
struct entry *sort;
int sorted;
/* Number of pairs associated with this cell. */
// int nr_pairs;
......@@ -79,7 +80,7 @@ struct cell {
struct cell *super;
/* The tasks computing this cell's sorts. */
struct task *sorts[13];
struct task *sorts;
int sortsize;
/* The tasks computing this cell's density. */
......
......@@ -65,7 +65,6 @@ void engine_prepare ( struct engine *e ) {
int j, k, qid;
struct space *s = e->s;
struct queue *q;
float dt_step = e->dt_step;
TIMER_TIC
......@@ -90,25 +89,12 @@ void engine_prepare ( struct engine *e ) {
}
// printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Re-set the particle data. */
// tic = getticks();
#pragma omp parallel for schedule(static) private(j)
for ( k = 0 ; k < s->nr_parts ; k++ )
if ( s->parts[k].dt <= dt_step ) {
s->parts[k].wcount = 0.0f;
s->parts[k].density.wcount_dh = 0.0f;
s->parts[k].rho = 0.0f;
s->parts[k].rho_dh = 0.0f;
s->parts[k].density.div_v = 0.0f;
for ( j = 0 ; j < 3 ; ++j)
s->parts[k].density.curl_v[j] = 0.0f;
}
// printf( "engine_prepare: re-setting particle data took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Run throught the tasks and get all the waits right. */
// tic = getticks();
#pragma omp parallel for schedule(static) private(j)
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[k].skip )
continue;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 );
for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ )
......@@ -260,7 +246,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
else {
/* Init with the first non-null child. */
for ( k = 0 ; c->progeny[k] == 0 ; k++ );
for ( k = 0 ; c->progeny[k] == NULL ; k++ );
dt_min = c->progeny[k]->dt_min;
dt_max = c->progeny[k]->dt_max;
h_max = c->progeny[k]->h_max;
......@@ -282,11 +268,12 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
c->dt_max = dt_max;
c->h_max = h_max;
c->dx_max = dx_max;
c->sorted = 0;
/* Clean out the task pointers. */
c->sorts[0] = NULL;
c->nr_tasks = 0;
c->nr_density = 0;
// c->sorts[0] = NULL;
// c->nr_tasks = 0;
// c->nr_density = 0;
}
......@@ -474,7 +461,7 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
#if defined(HAVE_SETAFFINITY)
cpu_set_t cpuset;
#endif
int k, qid, nrq;
int k;
/* Store the values. */
e->s = s;
......@@ -498,25 +485,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
error( "Failed to allocate queues." );
bzero( e->queues , nr_queues * sizeof(struct queue) );
/* Init the queues. */
for ( k = 0 ; k < nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* How many queues to fill initially? */
for ( nrq = 0 , k = nr_queues ; k > 0 ; k = k / 2 )
nrq += 1;
/* Fill the queues (round-robin). */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
continue;
// qid = 0;
// qid = k % nrq;
qid = k % nr_queues;
e->queues[qid].tid[ e->queues[qid].count ] = s->tasks_ind[k];
e->queues[qid].count += 1;
}
/* Sort the queues topologically. */
// for ( k = 0 ; k < nr_queues ; k++ )
// queue_sort( &e->queues[k] );
......
......@@ -41,8 +41,8 @@
#define lock_init( l ) ( *l = 0 )
#define lock_destroy( l ) 0
INLINE static int lock_lock ( volatile int *l ) {
while ( __sync_val_compare_and_swap( l , 0 , 1 ) != 0 )
while( *l );
while ( __sync_val_compare_and_swap( l , 0 , 1 ) != 0 );
// while( *l );
return 0;
}
#define lock_trylock( l ) ( ( *(l) ) ? 1 : __sync_val_compare_and_swap( l , 0 , 1 ) )
......
......@@ -298,7 +298,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
/* Grab the task lock. */
// if ( blocking ) {
if ( lock_lock( qlock ) != 0 )
error( "Locking the task_lock failed.\n" );
error( "Locking the qlock failed.\n" );
// }
// else {
// if ( lock_trylock( qlock ) != 0 )
......@@ -412,7 +412,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
/* Release the task lock. */
if ( lock_unlock( qlock ) != 0 )
error( "Unlocking the task_lock failed.\n" );
error( "Unlocking the qlock failed.\n" );
/* Leave? */
if ( tid >= 0 ) {
......
......@@ -198,10 +198,10 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
for ( k = 0 ; k < 8 ; k++ ) {
if ( c->progeny[k] == NULL )
continue;
if ( c->progeny[k]->sorts[0] == NULL )
if ( c->progeny[k]->sorts == NULL )
missing = flags;
else
missing = ( c->progeny[k]->sorts[0]->flags ^ flags ) & flags;
missing = ( c->progeny[k]->sorts->flags ^ flags ) & flags;
if ( missing )
runner_dosort( r , c->progeny[k] , missing );
}
......@@ -262,6 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
c->sort[ j*(c->count + 1) + c->count ].d = FLT_MAX;
c->sort[ j*(c->count + 1) + c->count ].i = 0;
/* Mark as sorted. */
c->sorted |= ( 1 << j );
} /* loop over sort arrays. */
} /* progeny? */
......@@ -287,6 +290,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
c->sort[ j*(count + 1) + c->count ].d = FLT_MAX;
c->sort[ j*(count + 1) + c->count ].i = 0;
runner_dosort_ascending( &c->sort[ j*(count + 1) ] , c->count );
c->sorted |= ( 1 << j );
}
}
......
......@@ -655,6 +655,10 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get the sort ID. */
sid = space_getsid( e->s , &ci , &cj , shift );
/* Have the cells been sorted? */
if ( !(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid) ) )
error( "Trying to interact unsorted cells." );
/* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ];
......@@ -902,6 +906,10 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get the shift ID. */
sid = space_getsid( e->s , &ci , &cj , shift );
/* Have the cells been sorted? */
if ( !(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid) ) )
error( "Trying to interact unsorted cells." );
/* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ];
......
......@@ -77,6 +77,109 @@ const int sortlistID[27] = {
};
/**
* @brief Mark tasks to be skipped and set the sort flags accordingly.
*
* @return 1 if the space has to be rebuilt, 0 otherwise.
*/
int space_marktasks ( struct space *s ) {
int k;
struct task *t;
float dt_step = s->dt_step;
/* Run through the tasks and clear the sort flags. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
/* Get a handle on the kth task. */
t = &s->tasks[k];
/* Sort task? */
if ( t->type == task_type_sort ) {
/* Re-set the flags. */
t->flags = 0;
}
}
/* Run through the tasks and mark as skip or not. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
/* Get a handle on the kth task. */
t = &s->tasks[k];
/* Single-cell task? */
if ( t->type == task_type_self ||
t->type == task_type_ghost ||
( t->type == task_type_sub && t->cj == NULL ) ) {
/* Set this task's skip. */
t->skip = ( t->ci->dt_min > dt_step );
/* Set the sort flags. */
if ( !t->skip && t->type == task_type_sub )
space_addsorts( s , t , t->ci , t->cj , t->flags );
}
/* Pair? */
else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) {
/* Set this task's skip. */
t->skip = ( t->ci->dt_min > dt_step && t->cj->dt_min > dt_step );
/* Too much particle movement? */
if ( !t->skip && t->tight &&
( t->ci->dx_max > t->ci->dmin || t->cj->dx_max > t->cj->dmin ) )
break;
/* Set the sort flags. */
if ( !t->skip ) {
if ( t->type == task_type_pair ) {
t->ci->sorts->flags |= (1 << t->flags);
t->cj->sorts->flags |= (1 << t->flags);
}
else
space_addsorts( s , t , t->ci , t->cj , t->flags );
}
}
/* None? */
else if ( t->type == task_type_none )
t->skip = 1;
}
/* Did this not go through? */
if ( k < s->nr_tasks )
return 1;
/* Run through the tasks and mark as skip or not. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
/* Get a handle on the kth task. */
t = &s->tasks[k];
/* Sort task? */
if ( t->type == task_type_sort ) {
/* If this sort has no flags, skip it. */
t->skip = ( t->flags == 0 );
}
}
/* All is well... */
return 0;
}
/**
* @brief Mapping function to set dt_min and dt_max.
*/
......@@ -113,7 +216,7 @@ void space_map_prepare ( struct cell *c , void *data ) {
for ( k = 1 ; k < c->count ; k++ ) {
p = &c->parts[k];
xp = p->xtras;
cp = &c->cparts[0];
cp = &c->cparts[k];
dt_min = fminf( dt_min , p->dt );
dt_max = fmaxf( dt_max , p->dt );
h_max = fmaxf( h_max , p->h );
......@@ -133,7 +236,7 @@ void space_map_prepare ( struct cell *c , void *data ) {
else {
/* Init with the first non-null child. */
for ( k = 0 ; c->progeny[k] == 0 ; k++ );
for ( k = 0 ; c->progeny[k] == NULL ; k++ );
dt_min = c->progeny[k]->dt_min;
dt_max = c->progeny[k]->dt_max;
h_max = c->progeny[k]->h_max;
......@@ -155,11 +258,12 @@ void space_map_prepare ( struct cell *c , void *data ) {
c->dt_max = dt_max;
c->h_max = h_max;
c->dx_max = dx_max;
c->sorted = 0;
/* Clean out the task pointers. */
c->sorts[0] = NULL;
c->nr_tasks = 0;
c->nr_density = 0;
// c->sorts = NULL;
// c->nr_tasks = 0;
// c->nr_density = 0;
}
......@@ -176,17 +280,13 @@ void space_map_prepare ( struct cell *c , void *data ) {
void space_prepare ( struct space *s ) {
int k;
struct task *t;
float dt_step = s->dt_step, dx_max = 0.0f;
int k, rebuild;
// struct task *t;
// float dt_step = s->dt_step;
float dx_max = 0.0f;
int counts[ task_type_count + 1 ];
ticks tic;
/* Traverse the cells and set their dt_min and dx_max. */
// tic = getticks();
// space_map_cells_post( s , 1 , &space_map_prepare , NULL );
// printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Get the maximum displacement in the whole system. */
for ( k = 0 ; k < s->nr_cells ; k++ )
dx_max = fmaxf( dx_max , s->cells[k].dx_max );
......@@ -194,24 +294,11 @@ void space_prepare ( struct space *s ) {
/* Run through the tasks and mark as skip or not. */
tic = getticks();
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_sort ||
t->type == task_type_self ||
t->type == task_type_ghost ||
( t->type == task_type_sub && t->cj == NULL ) )
t->skip = ( t->ci->dt_min > dt_step );
else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) {
t->skip = ( t->ci->dt_min > dt_step && t->cj->dt_min > dt_step );
if ( !t->skip && t->tight &&
( t->ci->dx_max > t->ci->dmin || t->cj->dx_max > t->cj->dmin ) )
break;
}
}
printf( "space_prepare: checking tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
rebuild = space_marktasks( s );
printf( "space_prepare: space_marktasks tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Did this not go through? */
if ( k < s->nr_tasks ) {
if ( rebuild ) {
/* Re-build the space. */
tic = getticks();
......@@ -223,12 +310,17 @@ void space_prepare ( struct space *s ) {
space_map_cells_post( s , 1 , &space_map_prepare , NULL );
printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Run through the tasks and mark as skip or not. */
tic = getticks();
rebuild = space_marktasks( s );
printf( "space_prepare: space_marktasks tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
}
/* Now that we have the cell structre, re-build the tasks. */
tic = getticks();
space_maketasks( s , 1 );
printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
// tic = getticks();
// space_maketasks( s , 1 );
// printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Count the number of each task type. */
tic = getticks();
......@@ -257,22 +349,22 @@ void space_prepare ( struct space *s ) {
void space_ranktasks ( struct space *s ) {
int i, j = 0, k, temp, left = 0, rank;
struct task *t;
int *tid = s->tasks_ind;
struct task *t, *tasks = s->tasks;
int *tid = s->tasks_ind, nr_tasks = s->nr_tasks;
/* Run throught the tasks and get all the waits right. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
for ( i = 0 , k = 0 ; k < nr_tasks ; k++ ) {
tid[k] = k;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
s->tasks[k].unlock_tasks[j]->wait += 1;
for ( j = 0 ; j < tasks[k].nr_unlock_tasks ; j++ )
tasks[k].unlock_tasks[j]->wait += 1;
}
/* Main loop. */
for ( j = 0 , rank = 0 ; left < s->nr_tasks ; rank++ ) {
for ( j = 0 , rank = 0 ; left < nr_tasks ; rank++ ) {
/* Load the tids of tasks with no waits. */
for ( k = left ; k < s->nr_tasks ; k++ )
if ( s->tasks[ tid[k] ].wait == 0 ) {
for ( k = left ; k < nr_tasks ; k++ )
if ( tasks[ tid[k] ].wait == 0 ) {
temp = tid[j]; tid[j] = tid[k]; tid[k] = temp;
j += 1;
}
......@@ -281,11 +373,13 @@ void space_ranktasks ( struct space *s ) {
if ( j == left )
error( "Unsatisfiable task dependencies detected." );
/* Traverse the task tree and add tasks with no weight. */
/* Unlock the next layer of tasks. */
for ( i = left ; i < j ; i++ ) {
t = &s->tasks[ tid[i] ];
t = &tasks[ tid[i] ];
t->rank = rank;
s->tasks_ind[i] = t - s->tasks;
tid[i] = t - tasks;
if ( tid[i] >= nr_tasks )
error( "Task index overshoot." );
/* printf( "engine_ranktasks: task %i of type %s has rank %i.\n" , i ,
(t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */
for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
......@@ -378,6 +472,12 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
float h, h_limit, h_max = 0.0f, dt_min = c->parts[0].dt, dt_max = dt_min;
struct cell *temp;
/* Clean out the task pointers. */
c->sorts = NULL;
c->nr_tasks = 0;
c->nr_density = 0;
c->dx_max = 0.0;
/* If the cell is already split, check that the split is still ok. */
if ( c->split ) {
......@@ -491,7 +591,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
struct part *restrict finger, *restrict p;
struct cpart *restrict cfinger;
int *ind;
// ticks tic;
ticks tic;
/* Be verbose about this. */
printf( "space_rebuild: (re)building space...\n" ); fflush(stdout);
......@@ -555,6 +655,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
c->h[0] = s->h[0]; c->h[1] = s->h[1]; c->h[2] = s->h[2];
c->dmin = dmin;
c->depth = 0;
lock_init( &c->lock );
}
/* Be verbose about the change. */
......@@ -622,6 +723,11 @@ void space_rebuild ( struct space *s , double cell_max ) {
space_rebuild_recurse( s , &s->cells[k] );
// printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Now that we have the cell structre, re-build the tasks. */
tic = getticks();
space_maketasks( s , 1 );
printf( "space_rebuild: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
}
......@@ -884,6 +990,12 @@ struct task *space_addtask ( struct space *s , int type , int subtype , int flag
t->nr_unlock_tasks = 0;
t->nr_unlock_cells = 0;
/* Init the lock. */
lock_init( &t->lock );
/* Add an index for it. */
s->tasks_ind[ s->nr_tasks ] = s->nr_tasks;
/* Increase the task counter. */
s->nr_tasks += 1;
......@@ -909,7 +1021,7 @@ void space_splittasks ( struct space *s ) {
struct cell *ci, *cj;
double hi, hj, shift[3];
struct task *t;
float dt_step = s->dt_step;
// float dt_step = s->dt_step;
int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } ,
{ -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } ,
{ -1 , -1 , -1 , 12 , 7 , 6 , 4 , 3 } ,
......@@ -924,6 +1036,13 @@ void space_splittasks ( struct space *s ) {
/* Get a pointer on the task. */
t = &s->tasks[tid];
/* Empty task? */
if ( t->ci == NULL || ( t->type == task_type_pair && t->cj == NULL ) ) {
t->type = task_type_none;
t->skip = 1;
continue;
}
/* Self-interaction? */
if ( t->type == task_type_self ) {
......@@ -931,10 +1050,10 @@ void space_splittasks ( struct space *s ) {
ci = t->ci;
/* Ingore this task? */
if ( ci->dt_min > dt_step ) {
/* if ( ci->dt_min > dt_step ) {
t->skip = 1;
continue;
}
} */
/* Is this cell even split? */
if ( !ci->split )
......@@ -981,14 +1100,14 @@ void space_splittasks ( struct space *s ) {
/* Get a handle on the cells involved. */
ci = t->ci;
cj = t->cj;
hi = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) );
hj = fmin( cj->h[0] , fmin( cj->h[1] , cj->h[2] ) );
hi = ci->dmin;
hj = cj->dmin;
/* Ingore this task? */
if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) {
/* if ( ci->dt_min > dt_step && cj->dt_min > dt_step ) {
t->skip = 1;
continue;
}
} */
/* Get the sort ID, use space_getsid and not t->flags
to make sure we get ci and cj swapped if needed. */
......@@ -1145,16 +1264,16 @@ void space_splittasks ( struct space *s ) {
else {
/* Create the sort for ci. */
if ( ci->sorts[0] == NULL )
ci->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 );
ci->sorts[0]->flags |= (1 << sid);
task_addunlock( ci->sorts[0] , t );
if ( ci->sorts == NULL )
ci->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 );
ci->sorts->flags |= (1 << sid);
task_addunlock( ci->sorts , t );
/* Create the sort for cj. */
if ( cj->sorts[0] == NULL )
cj->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , 0 );
cj->sorts[0]->flags |= (1 << sid);
task_addunlock( cj->sorts[0] , t );
if ( cj->sorts == NULL )
cj->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , cj , NULL , 0 );
cj->sorts->flags |= (1 << sid);
task_addunlock( cj->sorts , t );
}
......@@ -1177,13 +1296,10 @@ void space_splittasks ( struct space *s ) {
void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struct cell *cj , int sid ) {
float h;
float dmin = ci->dmin;
double shift[3];
int j, k;
/* Get the cell dimensions. */
h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) );
/* Single-cell sub? */
if ( cj == NULL ) {
......@@ -1215,19 +1331,25 @@ void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struc
/* If there is no further splitting, add the sorts. */
if ( !ci->split || !cj->split ||
ci->h_max*2 >= h || cj->h_max*2 >= h ) {
ci->h_max*2 >= dmin || cj->h_max*2 >= dmin ) {
/* Create and add the sort for ci. */
if ( ci->sorts[0] == NULL )
ci->sorts[0] = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 );
ci->sorts[0]->flags |= (1 << sid);
task_addunlock( ci->sorts[0] , t );
if ( ci->sorts == NULL ) {
ci->sorts = space_addtask( s , task_type_sort , 0 , 1 << sid , 0 , ci , NULL , 0 );
if ( ci->parent != NULL && ci->parent->sorts != NULL )
task_addunlock( ci->sorts , ci->parent->sorts );
}