Commit 9494cf3b authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several bug-fixes, tasks now only re-generated when cells change.


Former-commit-id: c89209038aa1469220b59886be0c29f5f38119d6
parent c826e1a1
...@@ -65,6 +65,7 @@ struct cell { ...@@ -65,6 +65,7 @@ struct cell {
/* Pointers for the sorted indices. */ /* Pointers for the sorted indices. */
struct entry *sort; struct entry *sort;
int sorted;
/* Number of pairs associated with this cell. */ /* Number of pairs associated with this cell. */
// int nr_pairs; // int nr_pairs;
...@@ -79,7 +80,7 @@ struct cell { ...@@ -79,7 +80,7 @@ struct cell {
struct cell *super; struct cell *super;
/* The tasks computing this cell's sorts. */ /* The tasks computing this cell's sorts. */
struct task *sorts[13]; struct task *sorts;
int sortsize; int sortsize;
/* The tasks computing this cell's density. */ /* The tasks computing this cell's density. */
......
...@@ -65,7 +65,6 @@ void engine_prepare ( struct engine *e ) { ...@@ -65,7 +65,6 @@ void engine_prepare ( struct engine *e ) {
int j, k, qid; int j, k, qid;
struct space *s = e->s; struct space *s = e->s;
struct queue *q; struct queue *q;
float dt_step = e->dt_step;
TIMER_TIC TIMER_TIC
...@@ -90,25 +89,12 @@ void engine_prepare ( struct engine *e ) { ...@@ -90,25 +89,12 @@ void engine_prepare ( struct engine *e ) {
} }
// printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); // printf( "engine_prepare: re-filling queues took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Re-set the particle data. */
// tic = getticks();
#pragma omp parallel for schedule(static) private(j)
for ( k = 0 ; k < s->nr_parts ; k++ )
if ( s->parts[k].dt <= dt_step ) {
s->parts[k].wcount = 0.0f;
s->parts[k].density.wcount_dh = 0.0f;
s->parts[k].rho = 0.0f;
s->parts[k].rho_dh = 0.0f;
s->parts[k].density.div_v = 0.0f;
for ( j = 0 ; j < 3 ; ++j)
s->parts[k].density.curl_v[j] = 0.0f;
}
// printf( "engine_prepare: re-setting particle data took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Run throught the tasks and get all the waits right. */ /* Run throught the tasks and get all the waits right. */
// tic = getticks(); // tic = getticks();
#pragma omp parallel for schedule(static) private(j) #pragma omp parallel for schedule(static) private(j)
for ( k = 0 ; k < s->nr_tasks ; k++ ) { for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[k].skip )
continue;
for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ ) for ( j = 0 ; j < s->tasks[k].nr_unlock_tasks ; j++ )
__sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 ); __sync_add_and_fetch( &s->tasks[k].unlock_tasks[j]->wait , 1 );
for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ ) for ( j = 0 ; j < s->tasks[k].nr_unlock_cells ; j++ )
...@@ -260,7 +246,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { ...@@ -260,7 +246,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
else { else {
/* Init with the first non-null child. */ /* Init with the first non-null child. */
for ( k = 0 ; c->progeny[k] == 0 ; k++ ); for ( k = 0 ; c->progeny[k] == NULL ; k++ );
dt_min = c->progeny[k]->dt_min; dt_min = c->progeny[k]->dt_min;
dt_max = c->progeny[k]->dt_max; dt_max = c->progeny[k]->dt_max;
h_max = c->progeny[k]->h_max; h_max = c->progeny[k]->h_max;
...@@ -282,11 +268,12 @@ void engine_map_kick_first ( struct cell *c , void *data ) { ...@@ -282,11 +268,12 @@ void engine_map_kick_first ( struct cell *c , void *data ) {
c->dt_max = dt_max; c->dt_max = dt_max;
c->h_max = h_max; c->h_max = h_max;
c->dx_max = dx_max; c->dx_max = dx_max;
c->sorted = 0;
/* Clean out the task pointers. */ /* Clean out the task pointers. */
c->sorts[0] = NULL; // c->sorts[0] = NULL;
c->nr_tasks = 0; // c->nr_tasks = 0;
c->nr_density = 0; // c->nr_density = 0;
} }
...@@ -474,7 +461,7 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_ ...@@ -474,7 +461,7 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
#if defined(HAVE_SETAFFINITY) #if defined(HAVE_SETAFFINITY)
cpu_set_t cpuset; cpu_set_t cpuset;
#endif #endif
int k, qid, nrq; int k;
/* Store the values. */ /* Store the values. */
e->s = s; e->s = s;
...@@ -498,25 +485,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_ ...@@ -498,25 +485,6 @@ void engine_init ( struct engine *e , struct space *s , int nr_threads , int nr_
error( "Failed to allocate queues." ); error( "Failed to allocate queues." );
bzero( e->queues , nr_queues * sizeof(struct queue) ); bzero( e->queues , nr_queues * sizeof(struct queue) );
/* Init the queues. */
for ( k = 0 ; k < nr_queues ; k++ )
queue_init( &e->queues[k] , s->nr_tasks , s->tasks );
/* How many queues to fill initially? */
for ( nrq = 0 , k = nr_queues ; k > 0 ; k = k / 2 )
nrq += 1;
/* Fill the queues (round-robin). */
for ( k = 0 ; k < s->nr_tasks ; k++ ) {
if ( s->tasks[ s->tasks_ind[k] ].type == task_type_none )
continue;
// qid = 0;
// qid = k % nrq;
qid = k % nr_queues;
e->queues[qid].tid[ e->queues[qid].count ] = s->tasks_ind[k];
e->queues[qid].count += 1;
}
/* Sort the queues topologically. */ /* Sort the queues topologically. */
// for ( k = 0 ; k < nr_queues ; k++ ) // for ( k = 0 ; k < nr_queues ; k++ )
// queue_sort( &e->queues[k] ); // queue_sort( &e->queues[k] );
......
...@@ -41,8 +41,8 @@ ...@@ -41,8 +41,8 @@
#define lock_init( l ) ( *l = 0 ) #define lock_init( l ) ( *l = 0 )
#define lock_destroy( l ) 0 #define lock_destroy( l ) 0
INLINE static int lock_lock ( volatile int *l ) { INLINE static int lock_lock ( volatile int *l ) {
while ( __sync_val_compare_and_swap( l , 0 , 1 ) != 0 ) while ( __sync_val_compare_and_swap( l , 0 , 1 ) != 0 );
while( *l ); // while( *l );
return 0; return 0;
} }
#define lock_trylock( l ) ( ( *(l) ) ? 1 : __sync_val_compare_and_swap( l , 0 , 1 ) ) #define lock_trylock( l ) ( ( *(l) ) ? 1 : __sync_val_compare_and_swap( l , 0 , 1 ) )
......
...@@ -298,7 +298,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ...@@ -298,7 +298,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
/* Grab the task lock. */ /* Grab the task lock. */
// if ( blocking ) { // if ( blocking ) {
if ( lock_lock( qlock ) != 0 ) if ( lock_lock( qlock ) != 0 )
error( "Locking the task_lock failed.\n" ); error( "Locking the qlock failed.\n" );
// } // }
// else { // else {
// if ( lock_trylock( qlock ) != 0 ) // if ( lock_trylock( qlock ) != 0 )
...@@ -412,7 +412,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep ...@@ -412,7 +412,7 @@ struct task *queue_gettask ( struct queue *q , int rid , int blocking , int keep
/* Release the task lock. */ /* Release the task lock. */
if ( lock_unlock( qlock ) != 0 ) if ( lock_unlock( qlock ) != 0 )
error( "Unlocking the task_lock failed.\n" ); error( "Unlocking the qlock failed.\n" );
/* Leave? */ /* Leave? */
if ( tid >= 0 ) { if ( tid >= 0 ) {
......
...@@ -198,10 +198,10 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -198,10 +198,10 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
for ( k = 0 ; k < 8 ; k++ ) { for ( k = 0 ; k < 8 ; k++ ) {
if ( c->progeny[k] == NULL ) if ( c->progeny[k] == NULL )
continue; continue;
if ( c->progeny[k]->sorts[0] == NULL ) if ( c->progeny[k]->sorts == NULL )
missing = flags; missing = flags;
else else
missing = ( c->progeny[k]->sorts[0]->flags ^ flags ) & flags; missing = ( c->progeny[k]->sorts->flags ^ flags ) & flags;
if ( missing ) if ( missing )
runner_dosort( r , c->progeny[k] , missing ); runner_dosort( r , c->progeny[k] , missing );
} }
...@@ -262,6 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -262,6 +262,9 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
c->sort[ j*(c->count + 1) + c->count ].d = FLT_MAX; c->sort[ j*(c->count + 1) + c->count ].d = FLT_MAX;
c->sort[ j*(c->count + 1) + c->count ].i = 0; c->sort[ j*(c->count + 1) + c->count ].i = 0;
/* Mark as sorted. */
c->sorted |= ( 1 << j );
} /* loop over sort arrays. */ } /* loop over sort arrays. */
} /* progeny? */ } /* progeny? */
...@@ -287,6 +290,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) { ...@@ -287,6 +290,7 @@ void runner_dosort ( struct runner *r , struct cell *c , int flags ) {
c->sort[ j*(count + 1) + c->count ].d = FLT_MAX; c->sort[ j*(count + 1) + c->count ].d = FLT_MAX;
c->sort[ j*(count + 1) + c->count ].i = 0; c->sort[ j*(count + 1) + c->count ].i = 0;
runner_dosort_ascending( &c->sort[ j*(count + 1) ] , c->count ); runner_dosort_ascending( &c->sort[ j*(count + 1) ] , c->count );
c->sorted |= ( 1 << j );
} }
} }
......
...@@ -655,6 +655,10 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) { ...@@ -655,6 +655,10 @@ void DOPAIR1 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get the sort ID. */ /* Get the sort ID. */
sid = space_getsid( e->s , &ci , &cj , shift ); sid = space_getsid( e->s , &ci , &cj , shift );
/* Have the cells been sorted? */
if ( !(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid) ) )
error( "Trying to interact unsorted cells." );
/* Get the cutoff shift. */ /* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ ) for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ]; rshift += shift[k]*runner_shift[ 3*sid + k ];
...@@ -902,6 +906,10 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) { ...@@ -902,6 +906,10 @@ void DOPAIR2 ( struct runner *r , struct cell *ci , struct cell *cj ) {
/* Get the shift ID. */ /* Get the shift ID. */
sid = space_getsid( e->s , &ci , &cj , shift ); sid = space_getsid( e->s , &ci , &cj , shift );
/* Have the cells been sorted? */
if ( !(ci->sorted & (1 << sid)) || !(cj->sorted & (1 << sid) ) )
error( "Trying to interact unsorted cells." );
/* Get the cutoff shift. */ /* Get the cutoff shift. */
for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ ) for ( rshift = 0.0 , k = 0 ; k < 3 ; k++ )
rshift += shift[k]*runner_shift[ 3*sid + k ]; rshift += shift[k]*runner_shift[ 3*sid + k ];
......
This diff is collapsed.
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#define space_subsize_default 1000 #define space_subsize_default 1000
#define space_dosub 1 #define space_dosub 1
#define space_stretch 1.0 #define space_stretch 1.0
#define space_maxtaskspercell 43 #define space_maxtaskspercell 30
/* Convert cell location to ID. */ /* Convert cell location to ID. */
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type #define VEC_MACRO(elcount, type) __attribute__((vector_size((elcount)*sizeof(type)))) type
/* So what will the vector size be? */ /* So what will the vector size be? */
#ifdef __AVX__ #ifdef NO__AVX__
#define VECTORIZE #define VECTORIZE
#define VEC_SIZE 8 #define VEC_SIZE 8
#define VEC_FLOAT __m256 #define VEC_FLOAT __m256
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
#define vec_ftoi(a) _mm256_cvttps_epi32(a) #define vec_ftoi(a) _mm256_cvttps_epi32(a)
#define vec_fmin(a,b) _mm256_min_ps(a,b) #define vec_fmin(a,b) _mm256_min_ps(a,b)
#define vec_fmax(a,b) _mm256_max_ps(a,b) #define vec_fmax(a,b) _mm256_max_ps(a,b)
#elif defined( __SSE2__ ) #elif defined( NO__SSE2__ )
#define VECTORIZE #define VECTORIZE
#define VEC_SIZE 4 #define VEC_SIZE 4
#define VEC_FLOAT __m128 #define VEC_FLOAT __m128
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment