From d40765a699607ffb1c85ca3480a58bba3e0ac9f4 Mon Sep 17 00:00:00 2001 From: Pedro Gonnet <pedro.gonnet@durham.ac.uk> Date: Sun, 2 Dec 2012 18:01:05 +0000 Subject: [PATCH] engine_ranktasks now actually stores the ranked order, no real need to re-sort tasks in queues. added engine_prepare, which re-sorts the particles into cells and rebuilds the tasks lists and queues if necessary. removed sorting along dt for now. Former-commit-id: 799188eb9e75ce8cdbee53fba2efa69b060ba7cf --- src/cell.c | 2 - src/engine.c | 55 +++++++++--- src/engine.h | 1 + src/runner.h | 4 +- src/space.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++---- src/space.h | 4 +- 6 files changed, 267 insertions(+), 35 deletions(-) diff --git a/src/cell.c b/src/cell.c index 0092644a5a..ba1a631037 100644 --- a/src/cell.c +++ b/src/cell.c @@ -257,8 +257,6 @@ void cell_split ( struct cell *c ) { /* Store the counts and offsets. */ for ( k = 0 ; k < 8 ; k++ ) { c->progeny[k]->count = right[k] - left[k] + 1; - if ( c->progeny[k]->count < 0 ) - abort(); c->progeny[k]->parts = &c->parts[ left[k] ]; } diff --git a/src/engine.c b/src/engine.c index c9433f4d11..b5f3794840 100644 --- a/src/engine.c +++ b/src/engine.c @@ -51,14 +51,50 @@ #define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) ) +/** + * @brief Prepare the #engine by re-building the cells and tasks. + * + * @param e The #engine to prepare. + * @param force Flag to force re-building the cell and task structure. + */ + +void engine_prepare ( struct engine *e , int force ) { + + int k, qid, changes; + struct space *s = e->s; + + /* Rebuild the space. */ + changes = space_rebuild( e->s , force ); + printf( "engine_prepare: space_rebuild with %i changes.\n" , changes ); + + /* Has anything changed? */ + if ( changes ) { + + /* Rank the tasks in topological order. */ + engine_ranktasks( e ); + + /* Clear the queues. */ + for ( k = 0 ; k < e->nr_queues ; k++ ) + e->queues[k].count = 0; + + /* Fill the queues (round-robin). */ + for ( k = 0 ; k < s->nr_tasks ; k++ ) { + if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none ) + continue; + qid = k % e->nr_queues; + e->queues[qid].tid[ e->queues[qid].count ] = s->tasks_ind[k]; + e->queues[qid].count += 1; + } + + } + + } + + /** * @brief Sort the tasks in topological order over all queues. * * @param e The #engine. - * - * TODO: Return the indices tid as these are the tasks sorted according - * to their ranks. They can then be dropped into the queues in order - * of these indices. */ void engine_ranktasks ( struct engine *e ) { @@ -66,7 +102,7 @@ void engine_ranktasks ( struct engine *e ) { int i, j = 0, k, temp, left = 0, rank; struct task *t; struct space *s = e->s; - int *tid; + int *tid = s->tasks_ind; /* Run throught the tasks and get all the waits right. */ for ( k = 0 ; k < s->nr_tasks ; k++ ) { @@ -74,12 +110,6 @@ void engine_ranktasks ( struct engine *e ) { s->tasks[k].unlock_tasks[j]->wait += 1; } - /* Allocate and init the task-ID array. */ - if ( ( tid = (int *)malloc( sizeof(int) * s->nr_tasks ) ) == NULL ) - error( "Failed to allocate temporary tid array." ); - for ( k = 0 ; k < s->nr_tasks ; k++ ) - tid[k] = k; - /* Main loop. */ for ( rank = 0 ; left < s->nr_tasks ; rank++ ) { @@ -106,9 +136,6 @@ void engine_ranktasks ( struct engine *e ) { } - /* Release the temporary array. */ - free(tid); - } diff --git a/src/engine.h b/src/engine.h index c2ca98abfe..e7ad72d52c 100644 --- a/src/engine.h +++ b/src/engine.h @@ -61,5 +61,6 @@ struct engine { /* Function prototypes. */ void engine_barrier( struct engine *e ); void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_queues , int policy ); +void engine_prepare ( struct engine *e , int force ); void engine_ranktasks ( struct engine *e ); void engine_run ( struct engine *e , int sort_queues ); diff --git a/src/runner.h b/src/runner.h index 4b21648853..99b8f5cdd3 100644 --- a/src/runner.h +++ b/src/runner.h @@ -88,8 +88,8 @@ extern int runner_counter[ runner_counter_count ]; /* Histogram functions. */ #define runner_hist_a 1.0 -#define runner_hist_b 1000.0 -#define runner_hist_N 100 +#define runner_hist_b 100.0 +#define runner_hist_N 99 long long int runner_hist_bins[ runner_hist_N ]; #define runner_hist_hit( x ) __sync_add_and_fetch( &runner_hist_bins[ (int)fmax( 0.0 , fmin( runner_hist_N-1 , ((x) - runner_hist_a) / (runner_hist_b - runner_hist_a) * runner_hist_N ) ) ] , 1 ) diff --git a/src/space.c b/src/space.c index b43203c97d..6a189ce148 100644 --- a/src/space.c +++ b/src/space.c @@ -76,6 +76,209 @@ const int sortlistID[27] = { }; +/** + * @breif Recursively dismantle a cell tree. + * + */ + +void space_rebuild_recycle ( struct space *s , struct cell *c ) { + + int k; + + if ( c->split ) + for ( k = 0 ; k < 8 ; k++ ) + if ( c->progeny[k] != NULL ) { + space_rebuild_recycle( s , c->progeny[k] ); + space_recycle( s , c->progeny[k] ); + c->progeny[k] = NULL; + } + + } + +/** + * @breif Recursively rebuild a cell tree. + * + */ + +int space_rebuild_recurse ( struct space *s , struct cell *c ) { + + int k, count, changes = 0, wasmt[8]; + float h, h_limit, h_max = 0.0f; + struct cell *temp; + + /* If the cell is already split, check that the split is still ok. */ + if ( c->split ) { + + /* Check the depth. */ + if ( c->depth > s->maxdepth ) + s->maxdepth = c->depth; + + /* Set the minimum cutoff. */ + h_limit = fmin( c->h[0] , fmin( c->h[1] , c->h[2] ) ) / 2; + + /* Count the particles below that. */ + for ( count = 0 , k = 0 ; k < c->count ; k++ ) { + h = c->parts[k].h; + if ( h <= h_limit ) + count += 1; + if ( h > h_max ) + h_max = h; + } + c->h_max = h_max; + + /* Un-split? */ + if ( count < c->count*space_splitratio || c->count < space_splitsize ) { + + /* Get rid of the progeny. */ + space_rebuild_recycle( s , c ); + + /* Re-set the split flag. */ + c->split = 0; + + } + + /* Otherwise, recurse on the kids. */ + else { + + /* Populate all progeny. */ + for ( k = 0 ; k < 8 ; k++ ) + if ( ( wasmt[k] = ( c->progeny[k] == NULL ) ) ) { + temp = space_getcell( s ); + temp->count = 0; + temp->loc[0] = c->loc[0]; + temp->loc[1] = c->loc[1]; + temp->loc[2] = c->loc[2]; + temp->h[0] = c->h[0]/2; + temp->h[1] = c->h[1]/2; + temp->h[2] = c->h[2]/2; + if ( k & 4 ) + temp->loc[0] += temp->h[0]; + if ( k & 2 ) + temp->loc[1] += temp->h[1]; + if ( k & 1 ) + temp->loc[2] += temp->h[2]; + temp->depth = c->depth + 1; + temp->split = 0; + temp->h_max = 0.0; + temp->parent = c; + c->progeny[k] = temp; + } + + /* Make sure each part is in its place. */ + cell_split( c ); + + /* Remove empty progeny. */ + for ( k = 0 ; k < 8 ; k++ ) + if ( c->progeny[k]->count == 0 ) { + changes += !wasmt[k]; + space_recycle( s , c->progeny[k] ); + c->progeny[k] = NULL; + } + else + changes += wasmt[k]; + + /* Recurse. */ + for ( k = 0 ; k < 8 ; k++ ) + if ( c->progeny[k] != NULL ) + changes += space_rebuild_recurse( s , c->progeny[k] ); + + } + + } + + /* Otherwise, try to split it anyway. */ + else { + space_split( s , c ); + changes += c->split; + } + + /* Return the grand total. */ + return changes; + + } + +/** + * @breif Re-build the cells as well as the tasks. + * + * @param s The #space in which to update the cells. + * @param force Flag to force re-building the cells and tasks. + * + * @return 1 if changes to the cells and/or tasks were made. + */ + +int space_rebuild ( struct space *s , int force ) { + + float h_max = 0.0f; + int i, j, k, cdim[3]; + struct cell *c; + int changes = 0; + + /* Run through the parts and get the current h_max. */ + for ( k = 0 ; k < s->nr_parts ; k++ ) + if ( s->parts[k].h > h_max ) + h_max = s->parts[k].h; + + /* Get the new putative cell dimensions. */ + for ( k = 0 ; k < 3 ; k++ ) + cdim[k] = floor( s->dim[k] / h_max ); + + /* Do we need to re-build the upper-level cells? */ + if ( force || cdim[0] < s->cdim[0] || cdim[1] < s->cdim[1] || cdim[2] < s->cdim[2] ) { + + /* Free the old cells, if they were allocated. */ + if ( s->cells != NULL ) { + for ( k = 0 ; k < s->nr_cells ; k++ ) + space_rebuild_recycle( s , &s->cells[k] ); + free( s->cells ); + s->maxdepth = 0; + } + + /* Set the new cell dimensions. */ + for ( k = 0 ; k < 3 ; k++ ) { + s->cdim[k] = cdim[k]; + s->h[k] = s->dim[k] / cdim[k]; + s->ih[k] = 1.0 / s->h[k]; + } + + /* Allocate the highest level of cells. */ + s->nr_cells = cdim[0] * cdim[1] * cdim[2]; + if ( posix_memalign( (void *)&s->cells , 64 , s->nr_cells * sizeof(struct cell) ) != 0 ) + error( "Failed to allocate cells." ); + bzero( s->cells , s->nr_cells * sizeof(struct cell) ); + for ( k = 0 ; k < s->nr_cells ; k++ ) + if ( lock_init( &s->cells[k].lock ) != 0 ) + error( "Failed to init spinlock." ); + + /* Set the cell location and sizes. */ + for ( i = 0 ; i < cdim[0] ; i++ ) + for ( j = 0 ; j < cdim[1] ; j++ ) + for ( k = 0 ; k < cdim[2] ; k++ ) { + c = &s->cells[ cell_getid( cdim , i , j , k ) ]; + c->loc[0] = i*s->h[0]; c->loc[1] = j*s->h[1]; c->loc[2] = k*s->h[2]; + c->h[0] = s->h[0]; c->h[1] = s->h[1]; c->h[2] = s->h[2]; + c->depth = 0; + } + + /* There were massive changes. */ + changes = 1; + + } /* re-build upper-level cells? */ + + /* At this point, we have the upper-level cells, old or new. Now make + sure that the parts in each cell are ok. */ + for ( k = 0 ; k < s->nr_cells ; k++ ) + changes += space_rebuild_recurse( s , &s->cells[k] ); + + /* Now that we have the cell structre, re-build the tasks. */ + if ( changes ) + space_maketasks( s , 1 ); + + /* Return the number of changes. */ + return changes; + + } + + /** * @brief Sort the particles according to the given indices. * @@ -855,7 +1058,6 @@ void space_maketasks ( struct space *s , int do_sort ) { int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd; int *cdim = s->cdim; - int nr_tasks_old = s->nr_tasks; struct task *t , *t2; int pts[7][8] = { { -1 , 12 , 10 , 9 , 4 , 3 , 1 , 0 } , { -1 , -1 , 11 , 10 , 5 , 4 , 2 , 1 } , @@ -881,14 +1083,14 @@ void space_maketasks ( struct space *s , int do_sort ) { if ( do_sort ) { if ( c->count < 1000 ) { - sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x3fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x1fff , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); for ( k = 0 ; k < 13 ; k++ ) c->sorts[k] = sort[0]; nr_sort = 1; } else if ( c->count < 5000 ) { sort[0] = space_addtask( s , task_type_sort , task_subtype_none , 0x7f , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x3f80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + sort[1] = space_addtask( s , task_type_sort , task_subtype_none , 0x1f80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); for ( k = 0 ; k < 7 ; k++ ) c->sorts[k] = sort[0]; for ( k = 7 ; k < 14 ; k++ ) @@ -902,7 +1104,7 @@ void space_maketasks ( struct space *s , int do_sort ) { c->sorts[6] = c->sorts[7] = sort[3] = space_addtask( s , task_type_sort , task_subtype_none , 0x40 + 0x80 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[8] = c->sorts[9] = sort[4] = space_addtask( s , task_type_sort , task_subtype_none , 0x100 + 0x200 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); c->sorts[10] = c->sorts[11] = sort[5] = space_addtask( s , task_type_sort , task_subtype_none , 0x400 + 0x800 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); - c->sorts[12] = c->sorts[13] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 + 0x2000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); + c->sorts[12] = c->sorts[13] = sort[6] = space_addtask( s , task_type_sort , task_subtype_none , 0x1000 , 0 , c , NULL , sort_up , nr_sort_up , NULL , 0 ); nr_sort = 7; } } @@ -957,9 +1159,17 @@ void space_maketasks ( struct space *s , int do_sort ) { } /* Allocate the task-list, if needed. */ - if ( s->tasks == NULL ) - if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tot_cells * 30 ) != 0 ) + if ( s->tasks == NULL || s->tasks_size < s->tot_cells * 30 ) { + if ( s->tasks != NULL ) + free( s->tasks ); + if ( s->tasks_ind != NULL ) + free( s->tasks_ind ); + s->tasks_size = s->tot_cells * 30; + if ( posix_memalign( (void *)&s->tasks , 64 , sizeof(struct task) * s->tasks_size ) != 0 ) error( "Failed to allocate task list." ); + if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->tasks_size ) ) == NULL ) + error( "Failed to allocate task indices." ); + } s->nr_tasks = 0; /* Loop over the cells and get their sub-tasks. */ @@ -1097,15 +1307,9 @@ void space_maketasks ( struct space *s , int do_sort ) { } - /* Did we already create indices? */ - if ( s->tasks_ind == NULL ) - if ( ( s->tasks_ind = (int *)malloc( sizeof(int) * s->nr_tasks ) ) == NULL ) - error( "Failed to allocate task indices." ); - - /* Did the number of tasks change, i.e. do we have to re-index? */ - if ( nr_tasks_old != s->nr_tasks ) - for ( k = 0 ; k < s->nr_tasks ; k++ ) - s->tasks_ind[k] = k; + /* Re-set the indices. */ + for ( k = 0 ; k < s->nr_tasks ; k++ ) + s->tasks_ind[k] = k; /* Count the number of each task type. */ for ( k = 0 ; k < task_type_count ; k++ ) @@ -1196,7 +1400,7 @@ void space_split ( struct space *s , struct cell *c ) { space_recycle( s , c->progeny[k] ); c->progeny[k] = NULL; } - + } /* Otherwise, set the progeny to null. */ diff --git a/src/space.h b/src/space.h index 32219c8a4f..5586d81f42 100644 --- a/src/space.h +++ b/src/space.h @@ -87,7 +87,7 @@ struct space { /* The list of tasks. */ struct task *tasks; - int nr_tasks, next_task; + int nr_tasks, next_task, tasks_size; int *tasks_ind; lock_type task_lock; @@ -102,7 +102,9 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , void space_maketasks ( struct space *s , int do_sort ); void space_map_cells ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data ); void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct cell *c , void *data ) , void *data ); +int space_rebuild ( struct space *s , int force ); void space_recycle ( struct space *s , struct cell *c ); +void space_split ( struct space *s , struct cell *c ); -- GitLab