Commit 0c135c44 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

limit number of threads on memory-bounded bits of the code.


Former-commit-id: db4d3aa9d24dc019f91f77c56f5a2884997b7c94
parent 58bc2643
......@@ -22,7 +22,7 @@ AUTOMAKE_OPTIONS=gnu
# Add the debug flag to the whole thing
AM_CFLAGS = -g -O3 -Wall -Werror -ffast-math -fstrict-aliasing -ftree-vectorize \
-funroll-loops $(SIMD_FLAGS) $(OPENMP_CFLAGS) \
-DTIMER -DCOUNTER -DCPU_TPS=2.40e9
-DTIMER -DCOUNTER -DCPU_TPS=2.40e9 -mfma4
# AM_CFLAGS = -Wall -Werror $(OPENMP_CFLAGS) \
# -DTIMER -DCOUNTER -DCPU_TPS=2.67e9
......
......@@ -326,7 +326,7 @@ void engine_prepare ( struct engine *e ) {
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
rebuild = ( e->step == 0 || engine_marktasks( e ) );
rebuild = 1 || ( e->step == 0 || engine_marktasks( e ) );
// printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Did this not go through? */
......
......@@ -625,7 +625,7 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) {
void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
int k, qid = -1;
int qid = -1;
/* Ignore skipped tasks. */
if ( t->skip )
......@@ -652,9 +652,10 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
/* If no previous owner, find the shortest queue. */
if ( qid < 0 )
for ( qid = 0 , k = 1 ; k < s->nr_queues ; k++ )
qid = rand() % s->nr_queues;
/* for ( qid = 0 , int k = 1 ; k < s->nr_queues ; k++ )
if ( s->queues[k].count < s->queues[qid].count )
qid = k;
qid = k; */
/* Increase the waiting counter. */
atomic_inc( &s->waiting );
......@@ -674,7 +675,7 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
void scheduler_done ( struct scheduler *s , struct task *t ) {
int k;
int k, res;
struct task *t2;
/* Release whatever locks this task held. */
......@@ -695,15 +696,17 @@ void scheduler_done ( struct scheduler *s , struct task *t ) {
they are ready. */
for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) {
t2 = t->unlock_tasks[k];
if ( atomic_dec( &t2->wait ) == 1 && !t2->skip )
if ( ( res = atomic_dec( &t2->wait ) ) < 1 )
error( "Negative wait!" );
if ( res == 1 && !t2->skip )
scheduler_enqueue( s , t2 );
}
/* Task definitely done. */
pthread_mutex_lock( &s->sleep_mutex );
// pthread_mutex_lock( &s->sleep_mutex );
atomic_dec( &s->waiting );
pthread_cond_broadcast( &s->sleep_cond );
pthread_mutex_unlock( &s->sleep_mutex );
// pthread_cond_broadcast( &s->sleep_cond );
// pthread_mutex_unlock( &s->sleep_mutex );
}
......@@ -751,10 +754,10 @@ struct task *scheduler_gettask ( struct scheduler *s , int qid ) {
}
/* If we failed, take a short nap. */
pthread_mutex_lock( &s->sleep_mutex );
/* pthread_mutex_lock( &s->sleep_mutex );
if ( s->waiting > 0 )
pthread_cond_wait( &s->sleep_cond , &s->sleep_mutex );
pthread_mutex_unlock( &s->sleep_mutex );
pthread_mutex_unlock( &s->sleep_mutex ); */
}
......
......@@ -27,7 +27,7 @@
/* Flags . */
#define scheduler_flag_none 0
#define scheduler_flag_steal 1
#define scheduler_flag_maxsteal 2
#define scheduler_flag_maxsteal 1
/* Data of a scheduler. */
......
......@@ -161,7 +161,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
struct part *restrict finger, *restrict p, *parts = s->parts;
int *ind;
double ih[3], dim[3];
// ticks tic;
ticks tic;
/* Be verbose about this. */
printf( "space_rebuild: (re)building space...\n" ); fflush(stdout);
......@@ -252,6 +252,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
s->cells[k].dx_max = 0.0f;
s->cells[k].sorted = 0;
s->cells[k].count = 0;
s->cells[k].kick1 = NULL;
s->cells[k].kick2 = NULL;
}
s->maxdepth = 0;
......@@ -259,7 +260,7 @@ void space_rebuild ( struct space *s , double cell_max ) {
}
/* Run through the particles and get their cell index. */
// tic = getticks();
tic = getticks();
if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL )
error( "Failed to allocate temporary particle indices." );
ih[0] = s->ih[0]; ih[1] = s->ih[1]; ih[2] = s->ih[2];
......@@ -276,12 +277,12 @@ void space_rebuild ( struct space *s , double cell_max ) {
ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] );
atomic_inc( &s->cells[ ind[k] ].count );
}
// printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Sort the parts according to their cells. */
// tic = getticks();
tic = getticks();
parts_sort( parts , ind , s->nr_parts , 0 , s->nr_cells-1 );
// printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
/* Verify sort. */
/* for ( k = 1 ; k < nr_parts ; k++ ) {
......@@ -307,21 +308,19 @@ void space_rebuild ( struct space *s , double cell_max ) {
/* At this point, we have the upper-level cells, old or new. Now make
sure that the parts in each cell are ok. */
// tic = getticks();
tic = getticks();
k = 0;
#pragma omp parallel shared(s,k)
#pragma omp parallel num_threads(8) shared(s,k)
{
while ( 1 ) {
int myk;
#pragma omp critical
myk = k++;
int myk = atomic_inc( &k );
if ( myk < s->nr_cells )
space_split( s , &s->cells[myk] );
else
break;
}
}
// printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
printf( "space_rebuild: space_rebuild_recurse took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 );
}
......@@ -358,7 +357,7 @@ void parts_sort ( struct part *parts , int *ind , int N , int min , int max ) {
first = 0; last = 1; waiting = 1;
/* Parallel bit. */
#pragma omp parallel default(shared) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p)
#pragma omp parallel num_threads(8) default(shared) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_p)
{
/* Main loop. */
......@@ -565,11 +564,11 @@ void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cel
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel shared(s,cid)
// #pragma omp parallel shared(s,cid)
{
int mycid;
while ( 1 ) {
#pragma omp critical
// #pragma omp critical
mycid = cid++;
if ( mycid < s->nr_cells )
rec_map( &s->cells[mycid] );
......@@ -602,11 +601,11 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell
}
/* Call the recursive function on all higher-level cells. */
#pragma omp parallel shared(s,cid)
// #pragma omp parallel shared(s,cid)
{
int mycid;
while ( 1 ) {
#pragma omp critical
// #pragma omp critical
mycid = cid++;
if ( mycid < s->nr_cells )
rec_map( &s->cells[mycid] );
......@@ -790,15 +789,22 @@ struct cell *space_getcell ( struct space *s ) {
s->cells_new = c->next;
s->tot_cells += 1;
/* Unlock the space. */
lock_unlock_blind( &s->lock );
/* Init some things in the cell. */
bzero( c , sizeof(struct cell) );
c->sorts = NULL;
c->nr_tasks = 0;
c->nr_density = 0;
c->dx_max = 0.0f;
c->sorted = 0;
c->count = 0;
c->kick1 = NULL;
c->kick2 = NULL;
if ( lock_init( &c->lock ) != 0 )
error( "Failed to initialize cell spinlock." );
c->owner = -1;
/* Unlock the space. */
lock_unlock_blind( &s->lock );
return c;
}
......
......@@ -26,8 +26,8 @@
#define space_splitratio 0.875f
#define space_splitsize_default 400
#define space_subsize_default 5000
#define space_stretch 1.05f
#define space_maxreldx 0.2f
#define space_stretch 1.10f
#define space_maxreldx 0.25f
#define space_qstack 1000
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment