Commit 19e474c8 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

massive overhaul of the task generation system.


Former-commit-id: 3faa03ed1880138adeb71df6d3cfcc5d9cd59f7f
parent e55a7195
......@@ -47,7 +47,9 @@ ticks cell_timer[ cell_timer_count ];
/* Define the timer macros. */
#ifdef TIMER_VERBOSE
#define TIMER
#ifndef TIMER
#define TIMER
#endif
#endif
#ifdef TIMER
#define TIMER_TIC ticks tic = getticks();
......@@ -61,7 +63,7 @@ ticks cell_timer[ cell_timer_count ];
# define INLINE inline
# endif
#endif
INLINE ticks timer_toc ( int t , ticks tic ) {
INLINE static ticks timer_toc ( int t , ticks tic ) {
ticks d = (getticks() - tic);
__sync_add_and_fetch( &cell_timer[t] , d );
return d;
......
......@@ -58,7 +58,7 @@ struct cell {
struct entry *sort;
/* Number of pairs associated with this cell. */
int nr_pairs;
// int nr_pairs;
/* Pointers to the next level of cells. */
struct cell *progeny[8];
......
......@@ -178,7 +178,7 @@ typedef unsigned long long ticks;
# define INLINE inline
# endif
#endif
INLINE ticks getticks(void)
INLINE static ticks getticks(void)
{
ticks ret;
......@@ -234,7 +234,7 @@ typedef unsigned long long ticks;
# define INLINE inline
# endif
#endif
INLINE ticks getticks(void)
INLINE static ticks getticks(void)
{
unsigned a, d;
asm volatile("rdtsc" : "=a" (a), "=d" (d));
......
......@@ -129,15 +129,16 @@ void engine_ranktasks ( struct engine *e ) {
struct task *t;
struct space *s = e->s;
int *tid = s->tasks_ind;
/* Run throught the tasks and get all the waits right. */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
tid[k] = k;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
s->tasks[k].unlock_tasks[j]->wait += 1;
}
/* Main loop. */
for ( rank = 0 ; left < s->nr_tasks ; rank++ ) {
for ( j = 0 , rank = 0 ; left < s->nr_tasks ; rank++ ) {
/* Load the tids of tasks with no waits. */
for ( k = left ; k < s->nr_tasks ; k++ )
......@@ -308,8 +309,8 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
}
/* Sort the queues topologically. */
for ( k = 0 ; k < nr_queues ; k++ )
queue_sort( &e->queues[k] );
// for ( k = 0 ; k < nr_queues ; k++ )
// queue_sort( &e->queues[k] );
/* Allocate and init the threads. */
if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL )
......
......@@ -40,7 +40,7 @@
#define lock_type volatile int
#define lock_init( l ) ( *l = 0 )
#define lock_destroy( l ) 0
INLINE int lock_lock ( volatile int *l ) {
INLINE static int lock_lock ( volatile int *l ) {
while ( __sync_val_compare_and_swap( l , 0 , 1 ) != 0 )
while( *l );
return 0;
......
......@@ -43,7 +43,9 @@
/* Define the timer macros. */
#ifdef TIMER_VERBOSE
#define TIMER
#ifndef TIMER
#define TIMER
#endif
#endif
#ifdef TIMER
#define TIMER_TIC ticks tic = getticks();
......@@ -57,7 +59,7 @@
# define INLINE inline
# endif
#endif
INLINE ticks timer_toc ( int t , ticks tic ) {
INLINE static ticks timer_toc ( int t , ticks tic ) {
ticks d = (getticks() - tic);
__sync_add_and_fetch( &queue_timer[t] , d );
return d;
......@@ -445,6 +447,8 @@ void queue_sort ( struct queue *q ) {
int *weight, *wait;
int *data = q->tid;
struct task *t;
printf( "queue_sort: sorting queue with %i tasks.\n" , q->count );
/* Allocate and pre-compute each task's weight. */
if ( ( weight = (int *)alloca( sizeof(int) * q->count ) ) == NULL ||
......
......@@ -177,14 +177,8 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
int i, ind, off[8], inds[8], temp_i;
// float shift[3];
float buff[8], px[3];
struct cell *temp_c;
TIMER_TIC
/* Does this cell even need to be sorted? */
for ( temp_c = c ; temp_c != NULL && temp_c->nr_pairs == 0 ; temp_c = temp_c->parent );
if ( temp_c == NULL )
return;
/* start by allocating the entry arrays. */
if ( lock_lock( &c->lock ) != 0 )
error( "Failed to lock cell." );
......@@ -204,6 +198,11 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
if ( !( flags & (1 << j) ) )
continue;
/* Sort any un-sorted progeny. */
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL && ( c->progeny[k]->sorts[0] == NULL || !(c->progeny[k]->sorts[0]->flags & (1 << j)) ) )
runner_dosort( r , c->progeny[k] , 1 << j );
/* Init the particle index offsets. */
for ( off[0] = 0 , k = 1 ; k < 8 ; k++ )
if ( c->progeny[k-1] != NULL )
......@@ -298,7 +297,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
error( "Sorting failed, indices borked." );
}
} */
#ifdef TIMER_VERBOSE
printf( "runner_dosort[%02i]: %i parts at depth %i (flags = %i%i%i%i%i%i%i%i%i%i%i%i%i) took %.3f ms.\n" ,
r->id , c->count , c->depth ,
......@@ -335,7 +334,7 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
runner_doghost( r , c->progeny[k] );
return;
}
/* Init the IDs that have to be updated. */
if ( ( pid = (int *)alloca( sizeof(int) * count ) ) == NULL )
error( "Call to alloca failed." );
......
......@@ -39,9 +39,15 @@ enum {
extern ticks runner_timer[ runner_timer_count ];
/* SID stuff. */
extern const char runner_flip[];
/* Define the timer macros. */
#ifdef TIMER_VERBOSE
#define TIMER
#ifndef TIMER
#define TIMER
#endif
#endif
#ifdef TIMER
#define TIMER_TIC ticks tic = getticks();
......@@ -55,7 +61,7 @@ extern ticks runner_timer[ runner_timer_count ];
# define INLINE inline
# endif
#endif
INLINE ticks timer_toc ( int t , ticks tic ) {
INLINE static ticks timer_toc ( int t , ticks tic ) {
ticks d = (getticks() - tic);
__sync_add_and_fetch( &runner_timer[t] , d );
return d;
......
......@@ -498,7 +498,7 @@ void DOPAIR_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
#endif
#ifdef TIMER_VERBOSE
printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
printf( "runner_dopair_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
#else
TIMER_TOC(runner_timer_dopair_subset);
#endif
......@@ -607,7 +607,7 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
#endif
#ifdef TIMER_VERBOSE
printf( "runner_doself_subset[%02i]: %i/%i parts at depth %i (r_max=%.3f/%.3f) took %.3f ms.\n" , r->id , count_i , count_j , ci->depth , ci->h_max , cj->h_max , ((double)TIMER_TOC(TIMER_DOPAIR)) / CPU_TPS * 1000 );
printf( "runner_doself_subset[%02i]: %i/%i parts at depth %i took %.3f ms.\n" , r->id , count , ci->count , ci->depth , ((double)TIMER_TOC(TIMER_DOSELF)) / CPU_TPS * 1000 );
#else
TIMER_TOC(runner_timer_dopair_subset);
#endif
......@@ -624,12 +624,11 @@ void DOSELF_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *
* @param cj The second #cell.
*/
void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
struct engine *restrict e = r->e;
int pid, pjd, k, sid;
double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
struct cell *temp;
struct entry *restrict sort_i, *restrict sort_j;
struct part *restrict pi, *restrict pj, *restrict parts_i, *restrict parts_j;
struct cpart *restrict cpi, *restrict cparts_i;
......@@ -653,26 +652,9 @@ void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restri
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
shift[k] = e->s->dim[k];
else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 )
shift[k] = -e->s->dim[k];
}
/* Get the sorting index. */
for ( sid = 0 , k = 0 ; k < 3 ; k++ )
sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
/* Switch the cells around? */
if ( runner_flip[sid] ) {
temp = ci; ci = cj; cj = temp;
for ( k = 0 ; k < 3 ; k++ )
shift[k] = -shift[k];
}
sid = sortlistID[sid];
/* Get the sort ID. */
sid = space_getsid( e->s , &ci , &cj , shift );
/* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
......@@ -709,7 +691,8 @@ void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restri
/* if ( ci->split && cj->split && sid == 4 )
printf( "boing!\n" ); */
/* Loop over the parts in ci. */
for ( pid = count_i-1 ; pid >= 0 && sort_i[pid].d + hi_max > dj_min ; pid-- ) {
......@@ -857,12 +840,11 @@ void DOPAIR1 ( struct runner *r , struct cell *restrict ci , struct cell *restri
}
void DOPAIR2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj ) {
void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
struct engine *restrict e = r->e;
int pid, pjd, k, sid;
double rshift, shift[3] = { 0.0 , 0.0 , 0.0 };
struct cell *temp;
struct entry *restrict sort_i, *restrict sort_j;
struct entry *restrict sortdt_i = NULL, *restrict sortdt_j = NULL;
int countdt_i = 0, countdt_j = 0;
......@@ -888,26 +870,9 @@ void DOPAIR2 ( struct runner *r , struct cell *restrict ci , struct cell *restri
/* Anything to do here? */
if ( ci->dt_min > dt_max && cj->dt_min > dt_max )
return;
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -e->s->dim[k]/2 )
shift[k] = e->s->dim[k];
else if ( cj->loc[k] - ci->loc[k] > e->s->dim[k]/2 )
shift[k] = -e->s->dim[k];
}
/* Get the sorting index. */
for ( sid = 0 , k = 0 ; k < 3 ; k++ )
sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
/* Switch the cells around? */
if ( runner_flip[sid] ) {
temp = ci; ci = cj; cj = temp;
for ( k = 0 ; k < 3 ; k++ )
shift[k] = -shift[k];
}
sid = sortlistID[sid];
/* Get the shift ID. */
sid = space_getsid( e->s , &ci , &cj , shift );
/* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
......@@ -1262,7 +1227,7 @@ void DOSELF1 ( struct runner *r , struct cell *restrict c ) {
pi = &parts[pid];
if ( cpi->dt > dt_max )
continue;
/* Get the particle position and radius. */
for ( k = 0 ; k < 3 ; k++ )
pix[k] = cpi->x[k];
......@@ -1512,9 +1477,9 @@ void DOSELF2 ( struct runner *r , struct cell *restrict c ) {
* redundant computations to find the sid on-the-fly.
*/
void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) {
void DOSUB1 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) {
int j, k;
int j = 0, k;
double shift[3];
float h;
struct space *s = r->e->s;
......@@ -1551,31 +1516,9 @@ void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restric
h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) );
/* Get the type of pair if not specified explicitly. */
if ( sid < 0 ) {
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 )
shift[k] = s->dim[k];
else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 )
shift[k] = -s->dim[k];
else
shift[k] = 0.0;
}
/* Get the sorting index. */
for ( sid = 0 , k = 0 ; k < 3 ; k++ )
sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
/* Flip? */
if ( sid < 13 ) {
struct cell *temp = cj; cj = ci; ci = temp;
}
else
sid = 26 - sid;
}
// if ( sid < 0 )
sid = space_getsid( s , &ci , &cj , shift );
/* Recurse? */
if ( ci->split && cj->split &&
ci->h_max*2 < h && cj->h_max*2 < h ) {
......@@ -1787,7 +1730,7 @@ void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restric
#ifdef TIMER_VERBOSE
printf( "runner_DOSUB[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
printf( "runner_dosub1[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , sid , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
#else
TIMER_TOC(TIMER_DOSUB);
#endif
......@@ -1795,7 +1738,7 @@ void DOSUB1 ( struct runner *r , struct cell *restrict ci , struct cell *restric
}
void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restrict cj , int sid ) {
void DOSUB2 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) {
int j, k;
double shift[3];
......@@ -1834,30 +1777,8 @@ void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restric
h = fmin( ci->h[0] , fmin( ci->h[1] , ci->h[2] ) );
/* Get the type of pair if not specified explicitly. */
if ( sid < 0 ) {
/* Get the relative distance between the pairs, wrapping. */
for ( k = 0 ; k < 3 ; k++ ) {
if ( cj->loc[k] - ci->loc[k] < -s->dim[k]/2 )
shift[k] = s->dim[k];
else if ( cj->loc[k] - ci->loc[k] > s->dim[k]/2 )
shift[k] = -s->dim[k];
else
shift[k] = 0.0;
}
/* Get the sorting index. */
for ( sid = 0 , k = 0 ; k < 3 ; k++ )
sid = 3*sid + ( (cj->loc[k] - ci->loc[k] + shift[k] < 0) ? 0 : (cj->loc[k] - ci->loc[k] + shift[k] > 0) ? 2 : 1 );
/* Flip? */
if ( sid < 13 ) {
struct cell *temp = cj; cj = ci; ci = temp;
}
else
sid = 26 - sid;
}
if ( sid < 0 )
sid = space_getsid( s , &ci , &cj , shift );
/* Recurse? */
if ( ci->split && cj->split &&
......@@ -2070,7 +1991,7 @@ void DOSUB2 ( struct runner *r , struct cell *restrict ci , struct cell *restric
#ifdef TIMER_VERBOSE
printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
printf( "runner_dosub2[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , sid , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
#else
TIMER_TOC(TIMER_DOSUB);
#endif
......@@ -2518,7 +2439,7 @@ void DOSUB_SUBSET ( struct runner *r , struct cell *restrict ci , struct part *p
#ifdef TIMER_VERBOSE
printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , flags , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
printf( "runner_dosub[%02i]: flags=%i at depth %i took %.3f ms.\n" , r->id , sid , ci->depth , ((double)TIMER_TOC(TIMER_DOSUB)) / CPU_TPS * 1000 );
#else
TIMER_TOC(TIMER_DOSUB);
#endif
......
This diff is collapsed.
......@@ -26,7 +26,7 @@
#define space_splitratio 0.875
#define space_splitsize_default 400
#define space_subsize_default 1000
#define space_dosub 0
#define space_dosub 1
#define space_stretch 1.0
#define space_maxtaskspercell 43
......@@ -98,10 +98,12 @@ struct space {
/* function prototypes. */
void space_addsorts ( struct space *s , struct task *t , struct cell *ci , struct cell *cj , int sid );
void parts_sort ( struct part *parts , int *ind , int N , int min , int max );
struct cell *space_getcell ( struct space *s );
struct task *space_gettask ( struct space *s );
struct task *space_addtask ( struct space *s , int type , int subtype , int flags , int wait , struct cell *ci , struct cell *cj , struct task *unlock_tasks[] , int nr_unlock_tasks , struct cell *unlock_cells[] , int nr_unlock_cells );
int space_getsid ( struct space *s , struct cell **ci , struct cell **cj , double *shift );
void space_init ( struct space *s , double dim[3] , struct part *parts , int N , int periodic , double h_max );
void space_maketasks ( struct space *s , int do_sort );
void space_map_cells ( struct space *s , int full , void (*fun)( struct cell *c , void *data ) , void *data );
......
......@@ -44,6 +44,30 @@ const char *taskID_names[task_type_count] = { "none" , "sort" , "self" , "pair"
#define error(s) { fprintf( stderr , "%s:%s:%i: %s\n" , __FILE__ , __FUNCTION__ , __LINE__ , s ); abort(); }
/**
* @breif Remove all unlocks to tasks that are of the given type.
*
* @param t The #task.
* @param type The task type ID to remove.
*/
void task_cleanunlock ( struct task *t , int type ) {
int k;
lock_lock( &t->lock );
for ( k = 0 ; k < t->nr_unlock_tasks ; k++ )
if ( t->unlock_tasks[k]->type == type ) {
t->nr_unlock_tasks -= 1;
t->unlock_tasks[k] = t->unlock_tasks[ t->nr_unlock_tasks ];
}
lock_unlock_blind( &t->lock );
}
/**
* @brief Remove an unlock_task from the given task.
*
......@@ -108,10 +132,6 @@ void task_addunlock( struct task *ta , struct task *tb ) {
int k;
/* Bogus? */
if ( ta == NULL || tb == NULL )
return;
lock_lock( &ta->lock );
/* Check if ta already unlocks tb. */
......
......@@ -65,4 +65,5 @@ struct task {
/* Function prototypes. */
void task_rmunlock( struct task *ta , struct task *tb );
void task_rmunlock_blind( struct task *ta , struct task *tb );
void task_cleanunlock ( struct task *t , int type );
void task_addunlock( struct task *ta , struct task *tb );
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment