Commit f6e86c59 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

preliminary implementation of variable time stepping. still problems with...

preliminary implementation of variable time stepping. still problems with sorting, should be more careful building the task tree.


Former-commit-id: 903851927b5bf4b34cfda269d488386c2378e445
parent 189416cb
......@@ -41,6 +41,9 @@ struct cell {
/* Max radii in this cell. */
double h_max;
/* Minimum and maximum dt in this cell. */
double dt_min, dt_max;
/* The depth of this cell in the tree. */
int depth, split;
......
......@@ -77,6 +77,10 @@ void engine_prepare ( struct engine *e , int force ) {
for ( k = 0 ; k < e->nr_queues ; k++ )
e->queues[k].count = 0;
/* Re-allocate the queue buffers? */
for ( k = 0 ; k < e->nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* Fill the queues (round-robin). */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
......@@ -213,7 +217,7 @@ void engine_barrier( struct engine *e ) {
* @param sort_queues Flag to try to sort the queues topologically.
*/
void engine_run ( struct engine *e , int sort_queues ) {
void engine_run ( struct engine *e , int sort_queues , float dt_max ) {
int k;
......@@ -226,6 +230,10 @@ void engine_run ( struct engine *e , int sort_queues ) {
}
}
/* Set the maximum dt. */
e->dt_max = dt_max;
e->s->dt_max = dt_max;
/* Cry havoc and let loose the dogs of war. */
e->barrier_count = -e->barrier_count;
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
......
......@@ -50,6 +50,9 @@ struct engine {
/* The queues. */
struct queue *queues;
/* The maximum dt to step. */
float dt_max;
/* Data for the threads' barrier. */
pthread_mutex_t barrier_mutex;
pthread_cond_t barrier_cond;
......@@ -63,4 +66,4 @@ void engine_barrier( struct engine *e );
void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy );
void engine_prepare ( struct engine *e , int force );
void engine_ranktasks ( struct engine *e );
void engine_run ( struct engine *e , int sort_queues );
void engine_run ( struct engine *e , int sort_queues , float dt_max );
......@@ -278,6 +278,7 @@ void read_ic ( char* fileName, double dim[3], struct part **parts, int* N, int*
/* Allocate memory to store particles */
if(posix_memalign( (void*)parts , 32 , *N * sizeof(struct part)) != 0)
error("Error while allocating memory for particles");
bzero( *parts , *N * sizeof(struct part) );
printf("read_ic: Allocated %8.2f MB for particles.\n", *N * sizeof(struct part) / (1024.*1024.));
......
......@@ -48,9 +48,6 @@ struct part {
/* Particle velocity. */
float v[3];
/* Particle acceleration. */
float a[3];
/* Particle density. */
float rho;
......@@ -69,6 +66,9 @@ struct part {
/* Derivative of the density with respect to this particle's smoothing length. */
float rho_dh;
/* Particle acceleration. */
float a[3];
/* Particle number density. */
// int icount;
float wcount;
......
......@@ -124,10 +124,16 @@ void queue_insert ( struct queue *q , struct task *t ) {
void queue_init ( struct queue *q , int size , struct task *tasks ) {
/* Allocate the task list. */
/* Allocate the task list if needed. */
if ( q->tid == NULL || q->size < size ) {
if ( q->tid != NULL )
free( q->tid );
q->size = size;
if ( ( q->tid = (int *)malloc( sizeof(int) * size ) ) == NULL )
error( "Failed to allocate queue tids." );
}
/* Set the tasks pointer. */
q->tasks = tasks;
/* Init counters. */
......
......@@ -325,6 +325,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
int i, k, redo, count = c->count;
int *pid;
float ihg, ihg2;
float dt_max = r->e->dt_max;
TIMER_TIC
/* Recurse? */
......@@ -353,6 +354,9 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
/* Get a direct pointer on the part. */
p = &c->parts[ pid[i] ];
/* Is this part within the timestep? */
if ( p->dt <= dt_max ) {
/* Adjust the computed rho. */
ihg = kernel_igamma / p->h;
ihg2 = ihg * ihg;
......@@ -376,14 +380,6 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
continue;
}
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
/* Compute this particle's time step. */
p->dt = const_cfl * p->h / sqrtf( const_gamma * ( const_gamma - 1.0f ) * p->u );
......@@ -395,6 +391,16 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
}
/* Reset the acceleration. */
for ( k = 0 ; k < 3 ; k++ )
p->a[k] = 0.0f;
/* Reset the time derivatives. */
p->u_dt = 0.0f;
p->h_dt = 0.0f;
}
/* Re-set the counter for the next loop (potentially). */
count = redo;
if ( count > 0 ) {
......@@ -570,18 +576,18 @@ void *runner_main ( void *data ) {
switch ( t->type ) {
case task_type_self:
if ( t->subtype == task_subtype_density )
runner_doself_density( r , ci );
runner_doself1_density( r , ci );
else if ( t->subtype == task_subtype_force )
runner_doself_force( r , ci );
runner_doself2_force( r , ci );
else
error( "Unknown task subtype." );
cell_unlocktree( ci );
break;
case task_type_pair:
if ( t->subtype == task_subtype_density )
runner_dopair_density( r , ci , cj );
runner_dopair1_density( r , ci , cj );
else if ( t->subtype == task_subtype_force )
runner_dopair_force( r , ci , cj );
runner_dopair2_force( r , ci , cj );
else
error( "Unknown task subtype." );
cell_unlocktree( ci );
......@@ -592,9 +598,9 @@ void *runner_main ( void *data ) {
break;
case task_type_sub:
if ( t->subtype == task_subtype_density )
runner_dosub_density( r , ci , cj , t->flags );
runner_dosub1_density( r , ci , cj , t->flags );
else if ( t->subtype == task_subtype_force )
runner_dosub_force( r , ci , cj , t->flags );
runner_dosub2_force( r , ci , cj , t->flags );
else
error( "Unknown task subtype." );
cell_unlocktree( ci );
......
......@@ -75,6 +75,7 @@ enum {
runner_counter_steal_stall,
runner_counter_steal_empty,
runner_counter_keep,
runner_counter_iact,
runner_counter_count,
};
extern int runner_counter[ runner_counter_count ];
......
This diff is collapsed.
......@@ -89,9 +89,10 @@ __attribute__ ((always_inline)) INLINE static void kernel_deval ( float x , floa
}
#ifdef VECTORIZE
__attribute__ ((always_inline)) INLINE static void kernel_deval_vec ( vector *x , vector *w , vector *dw_dx ) {
#ifdef VECTORIZE
vector ind, c[kernel_degree+1];
int j, k;
......@@ -113,16 +114,10 @@ __attribute__ ((always_inline)) INLINE static void kernel_deval_vec ( vector *x
w->v = ( x->v * w->v ) + c[k].v;
}
#else
/* Fake vectorization, i.e. hope the compiler gets it. */
for ( int k = 0 ; k < VEC_SIZE ; k++ )
kernel_deval( x.f[k] , &w.f[k] , &dw_dx.f[k] );
}
#endif
}
__attribute__ ((always_inline)) INLINE static void kernel_eval ( float x , float *W ) {
int ind = fmin( x , kernel_ivals );
......
......@@ -104,7 +104,7 @@ void space_rebuild_recycle ( struct space *s , struct cell *c ) {
int space_rebuild_recurse ( struct space *s , struct cell *c ) {
int k, count, changes = 0, wasmt[8];
float h, h_limit, h_max = 0.0f;
float h, h_limit, h_max = 0.0f, dt_min = c->parts[0].dt, dt_max = dt_min;
struct cell *temp;
/* If the cell is already split, check that the split is still ok. */
......@@ -124,8 +124,14 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
count += 1;
if ( h > h_max )
h_max = h;
if ( c->cparts[k].dt < dt_min )
dt_min = c->cparts[k].dt;
if ( c->cparts[k].dt > dt_max )
dt_max = c->cparts[k].dt;
}
c->h_max = h_max;
c->dt_min = dt_min;
c->dt_max = dt_max;
/* Un-split? */
if ( count < c->count*space_splitratio || c->count < space_splitsize ) {
......@@ -209,7 +215,7 @@ int space_rebuild_recurse ( struct space *s , struct cell *c ) {
int space_rebuild ( struct space *s , int force , double cell_max ) {
float h_max = s->parts[0].h, h_min = s->parts[0].h;
float h_max = s->cell_min, h_min = s->parts[0].h;
int i, j, k, cdim[3];
struct cell *c;
struct part *finger;
......@@ -235,13 +241,16 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
/* Free the old cells, if they were allocated. */
if ( s->cells != NULL ) {
for ( k = 0 ; k < s->nr_cells ; k++ )
for ( k = 0 ; k < s->nr_cells ; k++ ) {
space_rebuild_recycle( s , &s->cells[k] );
if ( s->cells[k].sort != NULL )
free( s->cells[k].sort );
}
free( s->cells );
s->maxdepth = 0;
}
/* Set the new cell dimensions. */
/* Set the new cell dimensions only if smaller. */
for ( k = 0 ; k < 3 ; k++ ) {
s->cdim[k] = cdim[k];
s->h[k] = s->dim[k] / cdim[k];
......@@ -274,7 +283,8 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
/* Run through the particles and get their cell index. */
ind = (int *)alloca( sizeof(int) * s->nr_parts );
if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL )
error( "Failed to allocate temporary particle indices." );
for ( k = 0 ; k < s->nr_cells ; k++ )
s->cells[ k ].count = 0;
for ( k = 0 ; k < s->nr_parts ; k++ ) {
......@@ -285,6 +295,9 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
/* Sort the parts according to their cells. */
parts_sort( s->parts , ind , s->nr_parts , 0 , s->nr_cells );
/* We no longer need the indices as of here. */
free( ind );
/* Update the condensed particle data. */
for ( k = 0 ; k < s->nr_parts ; k++ ) {
s->cparts[k].x[0] = s->parts[k].x[0];
......@@ -295,7 +308,9 @@ int space_rebuild ( struct space *s , int force , double cell_max ) {
}
/* Hook the cells up to the parts. */
for ( finger = s->parts , cfinger = s->cparts , k = 0 ; k < s->nr_cells ; k++ ) {
finger = s->parts;
cfinger = s->cparts;
for ( k = 0 ; k < s->nr_cells ; k++ ) {
c = &s->cells[ k ];
c->parts = finger;
c->cparts = cfinger;
......@@ -623,7 +638,7 @@ void space_splittasks ( struct space *s ) {
continue;
/* Make a sub? */
if ( ci->count < space_subsize ) {
if ( space_dosub && ci->count < space_subsize ) {
/* convert to a self-subtask. */
t->type = task_type_sub;
......@@ -701,22 +716,19 @@ void space_splittasks ( struct space *s ) {
sid = 26 - sid;
/* Replace by a single sub-task? */
if ( ci->count < space_subsize && cj->count < space_subsize &&
if ( space_dosub &&
ci->count < space_subsize && cj->count < space_subsize &&
sid != 0 && sid != 2 && sid != 6 && sid != 8 ) {
/* Make this task a sub task. */
t->type = task_type_sub;
t->flags = sid;
/* Make it depend on all the sorts of its sub-cells. */
for ( j = 0 ; j < 8 ; j++ ) {
if ( ci->progeny[j] != NULL )
/* Make it depend on all the sorts of its two cells. */
for ( k = 0 ; k < 14 ; k++ )
task_addunlock( ci->progeny[j]->sorts[k] , t );
if ( cj->progeny[j] != NULL )
task_addunlock( ci->sorts[k] , t );
for ( k = 0 ; k < 14 ; k++ )
task_addunlock( cj->progeny[j]->sorts[k] , t );
}
task_addunlock( cj->sorts[k] , t );
/* Don't go any further. */
continue;
......@@ -1070,12 +1082,12 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Allocate the task-list, if needed. */
if ( s->tasks == NULL || s->tasks_size < s->tot_cells * 43 ) {
if ( s->tasks == NULL || s->tasks_size < s->tot_cells * space_maxtaskspercell ) {
if ( s->tasks != NULL )
free( s->tasks );
if ( s->tasks_ind != NULL )
free( s->tasks_ind );
s->tasks_size = s->tot_cells * 43;
s->tasks_size = s->tot_cells * space_maxtaskspercell;
if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tasks_size ) != 0 )
error( "Failed to allocate task list." );
if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->tasks_size ) ) == NULL )
......@@ -1128,6 +1140,34 @@ void space_maketasks ( struct space *s , int do_sort ) {
/* Split the tasks. */
space_splittasks( s );
/* Remove pairs and self-interactions for cells with no parts in dt. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
if ( t->type == task_type_self ) {
if ( t->ci->dt_min > s->dt_max )
t->type = task_type_none;
}
else if ( t->type == task_type_pair ) {
if ( t->ci->dt_min > s->dt_max && t->cj->dt_min > s->dt_max ) {
t->type = task_type_none;
for ( j = 0 ; j < 13 ; j++ )
task_rmunlock_blind( t->ci->sorts[j] , t );
for ( j = 0 ; j < 13 ; j++ )
task_rmunlock_blind( t->cj->sorts[j] , t );
}
}
else if ( t->type == task_type_sub ) {
if ( t->ci->dt_min > s->dt_max && ( t->cj == NULL || t->cj->dt_min > s->dt_max ) ) {
t->type = task_type_none;
for ( j = 0 ; j < 13 ; j++ )
task_rmunlock_blind( t->ci->sorts[j] , t );
if ( t->cj != NULL )
for ( j = 0 ; j < 13 ; j++ )
task_rmunlock_blind( t->cj->sorts[j] , t );
}
}
}
/* Remove sort tasks with no dependencies. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
t = &s->tasks[k];
......@@ -1247,7 +1287,7 @@ void space_maketasks ( struct space *s , int do_sort ) {
void space_split ( struct space *s , struct cell *c ) {
int k, count;
double h, h_limit, h_max = 0.0;
double h, h_limit, h_max = 0.0, dt_min = c->parts[0].dt, dt_max = dt_min;
struct cell *temp;
/* Check the depth. */
......@@ -1264,8 +1304,14 @@ void space_split ( struct space *s , struct cell *c ) {
count += 1;
if ( h > h_max )
h_max = h;
if ( c->cparts[k].dt < dt_min )
dt_min = c->cparts[k].dt;
if ( c->cparts[k].dt > dt_max )
dt_max = c->cparts[k].dt;
}
c->h_max = h_max;
c->dt_min = dt_min;
c->dt_max = dt_max;
/* Split or let it be? */
if ( count > c->count*space_splitratio && c->count > space_splitsize ) {
......@@ -1420,6 +1466,7 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
s->periodic = periodic;
s->nr_parts = N;
s->parts = parts;
s->cell_min = h_max;
/* Allocate the cparts array. */
if ( posix_memalign( (void *)&s->cparts , 32 , N * sizeof(struct cpart) ) != 0 )
......
......@@ -26,8 +26,9 @@
#define space_splitratio 0.875
#define space_splitsize_default 400
#define space_subsize_default 1000
#define space_dosub 1
#define space_dosub 0
#define space_stretch 1.0
#define space_maxtaskspercell 43
/* Convert cell location to ID. */
......@@ -58,10 +59,10 @@ struct space {
double h[3], ih[3];
/* The minimum and maximum cutoff radii. */
double h_min, h_max;
double h_min, h_max, cell_min;
/* Current time step for particles. */
float dt;
float dt_max;
/* Number of cells. */
int nr_cells, tot_cells;
......@@ -79,9 +80,6 @@ struct space {
struct part *parts;
struct cpart *cparts;
/* The sortlist data (cells hold pointers to these). */
struct entry *sortlist;
/* The total number of parts in the space. */
int nr_parts;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment