-
Pedro Gonnet authored
Former-commit-id: 9afddac98ec7caa1d9087df5463f2ecd8546da35
Pedro Gonnet authoredFormer-commit-id: 9afddac98ec7caa1d9087df5463f2ecd8546da35
engine.c 28.64 KiB
/*******************************************************************************
* This file is part of SWIFT.
* Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
******************************************************************************/
/* Config parameters. */
#include "../config.h"
/* Some standard headers. */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <pthread.h>
#include <math.h>
#include <float.h>
#include <limits.h>
#include <omp.h>
#include <sched.h>
/* Local headers. */
#include "cycle.h"
#include "atomic.h"
#include "timers.h"
#include "const.h"
#include "vector.h"
#include "lock.h"
#include "task.h"
#include "part.h"
#include "debug.h"
#include "cell.h"
#include "space.h"
#include "queue.h"
#include "scheduler.h"
#include "engine.h"
#include "runner.h"
#include "runner_iact.h"
#include "error.h"
/* Convert cell location to ID. */
#define cell_getid( cdim , i , j , k ) ( (int)(k) + (cdim)[2]*( (int)(j) + (cdim)[1]*(int)(i) ) )
/**
* @brief Fill the #space's task list.
*
* @param s The #space we are working in.
*/
void engine_maketasks ( struct engine *e ) {
struct space *s = e->s;
struct scheduler *sched = &e->sched;
int i, j, k, ii, jj, kk, iii, jjj, kkk, cid, cjd, sid;
int *cdim = s->cdim;
struct task *t, *t2;
struct cell *ci, *cj;
/* Re-set the scheduler. */
scheduler_reset( sched , s->tot_cells * engine_maxtaskspercell );
/* Run through the highest level of cells and add pairs. */
for ( i = 0 ; i < cdim[0] ; i++ )
for ( j = 0 ; j < cdim[1] ; j++ )
for ( k = 0 ; k < cdim[2] ; k++ ) {
cid = cell_getid( cdim , i , j , k );
if ( s->cells[cid].count == 0 )
continue;
ci = &s->cells[cid];
if ( ci->count == 0 )
continue;
scheduler_addtask( sched , task_type_self , task_subtype_density , 0 , 0 , ci , NULL , 0 );
for ( ii = -1 ; ii < 2 ; ii++ ) {
iii = i + ii;
if ( !s->periodic && ( iii < 0 || iii >= cdim[0] ) )
continue;
iii = ( iii + cdim[0] ) % cdim[0];
for ( jj = -1 ; jj < 2 ; jj++ ) {
jjj = j + jj;
if ( !s->periodic && ( jjj < 0 || jjj >= cdim[1] ) )
continue;
jjj = ( jjj + cdim[1] ) % cdim[1];
for ( kk = -1 ; kk < 2 ; kk++ ) {
kkk = k + kk;
if ( !s->periodic && ( kkk < 0 || kkk >= cdim[2] ) )
continue;
kkk = ( kkk + cdim[2] ) % cdim[2];
cjd = cell_getid( cdim , iii , jjj , kkk );
cj = &s->cells[cjd];
if ( cid >= cjd || cj->count == 0 )
continue;
sid = sortlistID[ (kk+1) + 3*( (jj+1) + 3*(ii+1) ) ];
t = scheduler_addtask( sched , task_type_pair , task_subtype_density , sid , 0 , ci , cj , 1 );
}
}
}
}
/* Split the tasks. */
scheduler_splittasks( sched );
/* Count the number of tasks associated with each cell and
store the density tasks in each cell, and make each sort
depend on the sorts of its progeny. */
// #pragma omp parallel for private(t,j)
for ( k = 0 ; k < sched->nr_tasks ; k++ ) {
t = &sched->tasks[k];
if ( t->skip )
continue;
if ( t->type == task_type_sort && t->ci->split )
for ( j = 0 ; j < 8 ; j++ ) {
if ( t->ci->progeny[j] != NULL ) {
if ( t->ci->progeny[j]->sorts == NULL )
t->ci->progeny[j]->sorts = scheduler_addtask( sched , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 );
t->ci->progeny[j]->sorts->skip = 0;
task_addunlock( t->ci->progeny[j]->sorts , t );
}
}
if ( t->type == task_type_self ) {
atomic_inc( &t->ci->nr_tasks );
if ( t->subtype == task_subtype_density ) {
t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
}
}
else if ( t->type == task_type_pair ) {
atomic_inc( &t->ci->nr_tasks );
atomic_inc( &t->cj->nr_tasks );
if ( t->subtype == task_subtype_density ) {
t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t;
}
}
else if ( t->type == task_type_sub ) {
atomic_inc( &t->ci->nr_tasks );
if ( t->cj != NULL )
atomic_inc( &t->cj->nr_tasks );
if ( t->subtype == task_subtype_density ) {
t->ci->density[ atomic_inc( &t->ci->nr_density ) ] = t;
if ( t->cj != NULL )
t->cj->density[ atomic_inc( &t->cj->nr_density ) ] = t;
}
}
}
/* Append a ghost task to each cell. */
space_map_cells_pre( s , 1 , &scheduler_map_mkghosts , sched );
/* Run through the tasks and make force tasks for each density task.
Each force task depends on the cell ghosts and unlocks the kick2 task
of its super-cell. */
kk = sched->nr_tasks;
// #pragma omp parallel for private(t,t2)
for ( k = 0 ; k < kk ; k++ ) {
/* Get a pointer to the task. */
t = &sched->tasks[k];
/* Skip? */
if ( t->skip )
continue;
/* Self-interaction? */
if ( t->type == task_type_self && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->super->ghost );
t2 = scheduler_addtask( sched , task_type_self , task_subtype_force , 0 , 0 , t->ci , NULL , 0 );
task_addunlock( t->ci->ghost , t2 );
task_addunlock( t2 , t->ci->super->kick2 );
}
/* Otherwise, pair interaction? */
else if ( t->type == task_type_pair && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->super->ghost );
if ( t->ci->super != t->cj->super )
task_addunlock( t , t->cj->super->ghost );
t2 = scheduler_addtask( sched , task_type_pair , task_subtype_force , 0 , 0 , t->ci , t->cj , 0 );
task_addunlock( t->ci->ghost , t2 );
task_addunlock( t->cj->ghost , t2 );
task_addunlock( t2 , t->ci->super->kick2 );
if ( t->ci->super != t->cj->super )
task_addunlock( t2 , t->cj->super->kick2 );
}
/* Otherwise, sub interaction? */
else if ( t->type == task_type_sub && t->subtype == task_subtype_density ) {
task_addunlock( t , t->ci->super->ghost );
if ( t->cj != NULL && t->ci->super != t->cj->super )
task_addunlock( t , t->cj->super->ghost );
t2 = scheduler_addtask( sched , task_type_sub , task_subtype_force , t->flags , 0 , t->ci , t->cj , 0 );
task_addunlock( t->ci->ghost , t2 );
if ( t->cj != NULL )
task_addunlock( t->cj->ghost , t2 );
task_addunlock( t2 , t->ci->super->kick2 );
if ( t->cj != NULL && t->ci->super != t->cj->super )
task_addunlock( t2 , t->cj->super->kick2 );
}
}
/* Rank the tasks. */
scheduler_ranktasks( sched );
/* Count the number of each task type. */
int counts[ task_type_count+1 ];
for ( k = 0 ; k <= task_type_count ; k++ )
counts[k] = 0;
for ( k = 0 ; k < sched->nr_tasks ; k++ )
if ( !sched->tasks[k].skip )
counts[ (int)sched->tasks[k].type ] += 1;
else
counts[ task_type_count ] += 1;
printf( "engine_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] );
for ( k = 1 ; k < task_type_count ; k++ )
printf( " %s=%i" , taskID_names[k] , counts[k] );
printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout);
}
/**
* @brief Mark tasks to be skipped and set the sort flags accordingly.
*
* @return 1 if the space has to be rebuilt, 0 otherwise.
*/
int engine_marktasks ( struct engine *e ) {
struct scheduler *s = &e->sched;
int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind;
struct task *t, *tasks = s->tasks;
float dt_step = e->dt_step;
struct cell *ci, *cj;
/* Run through the tasks and mark as skip or not. */
for ( k = 0 ; k < nr_tasks ; k++ ) {
/* Get a handle on the kth task. */
t = &tasks[ ind[k] ];
/* Sort-task? Note that due to the task ranking, the sorts
will all come before the pairs and/or subs. */
if ( t->type == task_type_sort ) {
/* Re-set the flags. */
t->flags = 0;
t->skip = 1;
}
/* Single-cell task? */
else if ( t->type == task_type_self ||
t->type == task_type_ghost ||
( t->type == task_type_sub && t->cj == NULL ) ) {
/* Set this task's skip. */
t->skip = ( t->ci->dt_min > dt_step );
}
/* Pair? */
else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) {
/* Local pointers. */
ci = t->ci;
cj = t->cj;
/* Set this task's skip. */
t->skip = ( ci->dt_min > dt_step && cj->dt_min > dt_step );
/* Too much particle movement? */
if ( t->tight &&
( fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max > cj->dmin ||
ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) )
return 1;
/* Set the sort flags. */
if ( !t->skip && t->type == task_type_pair ) {
ci->sorts->flags |= (1 << t->flags);
ci->sorts->skip = 0;
cj->sorts->flags |= (1 << t->flags);
cj->sorts->skip = 0;
}
}
/* Kick2? */
else if ( t->type == task_type_kick2 )
t->skip = 0;
/* None? */
else if ( t->type == task_type_none )
t->skip = 1;
}
/* All is well... */
return 0;
}
/**
* @brief Prepare the #engine by re-building the cells and tasks.
*
* @param e The #engine to prepare.
*/
void engine_prepare ( struct engine *e ) {
int rebuild;
TIMER_TIC
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
rebuild = ( e->step == 0 || engine_marktasks( e ) );
// printf( "space_prepare: space_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Did this not go through? */
if ( rebuild ) {
/* Re-build the space. */
// tic = getticks();
space_rebuild( e->s , 0.0 );
// printf( "engine_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Re-build the tasks. */
// tic = getticks();
engine_maketasks( e );
// printf( "engine_prepare: engine_maketasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Run through the tasks and mark as skip or not. */
// tic = getticks();
if ( engine_marktasks( e ) )
error( "engine_marktasks failed after space_rebuild." );
// printf( "engine_prepare: engine_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
}
/* Start the scheduler. */
scheduler_start( &e->sched , (1 << task_type_sort) |
(1 << task_type_self) |
(1 << task_type_pair) |
(1 << task_type_sub) |
(1 << task_type_ghost) |
(1 << task_type_kick2) );
TIMER_TOC( timer_prepare );
}
/**
* @brief Implements a barrier for the #runner threads.
*
* @param e The #engine.
*/
void engine_barrier ( struct engine *e ) {
/* First, get the barrier mutex. */
if ( pthread_mutex_lock( &e->barrier_mutex ) != 0 )
error( "Failed to get barrier mutex." );
/* This thread is no longer running. */
e->barrier_running -= 1;
/* If all threads are in, send a signal... */
if ( e->barrier_running == 0 )
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
error( "Failed to broadcast barrier full condition." );
/* Wait for the barrier to open. */
while ( e->barrier_launch == 0 )
if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
error( "Eror waiting for barrier to close." );
/* This thread has been launched. */
e->barrier_running += 1;
e->barrier_launch -= 1;
/* If I'm the last one out, signal the condition again. */
if ( e->barrier_launch == 0 )
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
error( "Failed to broadcast empty barrier condition." );
/* Last but not least, release the mutex. */
if ( pthread_mutex_unlock( &e->barrier_mutex ) != 0 )
error( "Failed to get unlock the barrier mutex." );
}
/**
* @brief Mapping function to collect the data from the second kick.
*/
void engine_collect_kick2 ( struct cell *c ) {
int k, updated = 0;
float dt_min = FLT_MAX, dt_max = 0.0f;
double ekin = 0.0, epot = 0.0;
float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f };
struct cell *cp;
/* If I am a super-cell, return immediately. */
if ( c->kick2 != NULL || c->count == 0 )
return;
/* If this cell is not split, I'm in trouble. */
if ( !c->split )
error( "Cell has no super-cell." );
/* Collect the values from the progeny. */
for ( k = 0 ; k < 8 ; k++ )
if ( ( cp = c->progeny[k] ) != NULL ) {
engine_collect_kick2( cp );
dt_min = fminf( dt_min , cp->dt_min );
dt_max = fmaxf( dt_max , cp->dt_max );
updated += cp->updated;
ekin += cp->ekin;
epot += cp->epot;
mom[0] += cp->mom[0]; mom[1] += cp->mom[1]; mom[2] += cp->mom[2];
ang[0] += cp->ang[0]; ang[1] += cp->ang[1]; ang[2] += cp->ang[2];
}
/* Store the collected values in the cell. */
c->dt_min = dt_min;
c->dt_max = dt_max;
c->updated = updated;
c->ekin = ekin;
c->epot = epot;
c->mom[0] = mom[0]; c->mom[1] = mom[1]; c->mom[2] = mom[2];
c->ang[0] = ang[0]; c->ang[1] = ang[1]; c->ang[2] = ang[2];
}
/**
* @brief Compute the force on a single particle brute-force.
*/
void engine_single_density ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) {
int i, k;
double r2, dx[3];
float fdx[3], ih;
struct part p;
/* Find "our" part. */
for ( k = 0 ; k < N && parts[k].id != pid ; k++ );
if ( k == N )
error( "Part not found." );
p = parts[k];
/* Clear accumulators. */
ih = 1.0f / p.h;
p.rho = 0.0f; p.rho_dh = 0.0f;
p.density.wcount = 0.0f; p.density.wcount_dh = 0.0f;
p.density.div_v = 0.0;
for ( k=0 ; k < 3 ; k++)
p.density.curl_v[k] = 0.0;
/* Loop over all particle pairs (force). */
for ( k = 0 ; k < N ; k++ ) {
if ( parts[k].id == p.id )
continue;
for ( i = 0 ; i < 3 ; i++ ) {
dx[i] = p.x[i] - parts[k].x[i];
if ( periodic ) {
if ( dx[i] < -dim[i]/2 )
dx[i] += dim[i];
else if ( dx[i] > dim[i]/2 )
dx[i] -= dim[i];
}
fdx[i] = dx[i];
}
r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
if ( r2 < p.h*p.h*kernel_gamma2 ) {
runner_iact_nonsym_density( r2 , fdx , p.h , parts[k].h , &p , &parts[k] );
}
}
/* Dump the result. */
p.rho = ih * ih * ih * ( p.rho + p.mass*kernel_root );
p.rho_dh = p.rho_dh * ih * ih * ih * ih;
p.density.wcount = ( p.density.wcount + kernel_root ) * ( 4.0f / 3.0 * M_PI * kernel_gamma3 );
printf( "pairs_single_density: part %lli (h=%e) has wcount=%e, rho=%e, rho_dh=%e.\n" , p.id , p.h , p.density.wcount , p.rho , p.rho_dh );
fflush(stdout);
}
void engine_single_force ( double *dim , long long int pid , struct part *__restrict__ parts , int N , int periodic ) {
int i, k;
double r2, dx[3];
float fdx[3];
struct part p;
/* Find "our" part. */
for ( k = 0 ; k < N && parts[k].id != pid ; k++ );
if ( k == N )
error( "Part not found." );
p = parts[k];
/* Clear accumulators. */
p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f;
p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
/* Loop over all particle pairs (force). */
for ( k = 0 ; k < N ; k++ ) {
// for ( k = N-1 ; k >= 0 ; k-- ) {
if ( parts[k].id == p.id )
continue;
for ( i = 0 ; i < 3 ; i++ ) {
dx[i] = p.x[i] - parts[k].x[i];
if ( periodic ) {
if ( dx[i] < -dim[i]/2 )
dx[i] += dim[i];
else if ( dx[i] > dim[i]/2 )
dx[i] -= dim[i];
}
fdx[i] = dx[i];
}
r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2];
if ( r2 < p.h*p.h*kernel_gamma2 || r2 < parts[k].h*parts[k].h*kernel_gamma2 ) {
p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f;
p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f;
runner_iact_nonsym_force( r2 , fdx , p.h , parts[k].h , &p , &parts[k] );
double dvdr = ( (p.v[0]-parts[k].v[0])*fdx[0] + (p.v[1]-parts[k].v[1])*fdx[1] + (p.v[2]-parts[k].v[2])*fdx[2] ) / sqrt(r2);
printf( "pairs_single_force: part %lli and %lli interact (r=%.3e,dvdr=%.3e) with a=[%.3e,%.3e,%.3e], dudt=%.3e.\n" ,
p.id , parts[k].id , sqrt(r2) , dvdr , p.a[0] , p.a[1], p.a[2] , p.force.u_dt );
}
}
/* Dump the result. */
// printf( "pairs_single_force: part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.\n" , p.id , p.h , p.a[0] , p.a[1] , p.a[2] , p.force.u_dt );
fflush(stdout);
}
/**
* @breif Launch the runners.
*
* @param e The #engine.
* @param nr_runners The number of #runners to let loose.
*/
void engine_launch ( struct engine *e , int nr_runners ) {
/* Cry havoc and let loose the dogs of war. */
e->barrier_launch = nr_runners;
if ( pthread_cond_broadcast( &e->barrier_cond ) != 0 )
error( "Failed to broadcast barrier open condition." );
/* Sit back and wait for the runners to come home. */
while ( e->barrier_launch || e->barrier_running )
if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
error( "Error while waiting for barrier." );
}
/**
* @brief Let the #engine loose to compute the forces.
*
* @param e The #engine.
* @param sort_queues Flag to try to sort the queues topologically.
*/
void engine_step ( struct engine *e ) {
int k;
float dt = e->dt, dt_step, dt_max = 0.0f, dt_min = FLT_MAX;
double epot = 0.0, ekin = 0.0;
float mom[3] = { 0.0 , 0.0 , 0.0 };
float ang[3] = { 0.0 , 0.0 , 0.0 };
int count = 0;
struct cell *c;
struct space *s = e->s;
TIMER_TIC2
/* Get the maximum dt. */
if ( e->policy & engine_policy_multistep ) {
dt_step = 2.0f*dt;
for ( k = 0 ; k < 32 && (e->step & (1 << k)) == 0 ; k++ )
dt_step *= 2;
}
else
dt_step = FLT_MAX;
/* Set the maximum dt. */
e->dt_step = dt_step;
e->s->dt_step = dt_step;
// printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout);
// printParticle( parts , 432626 );
/* First kick. */
TIMER_TIC
scheduler_start( &e->sched , (1 << task_type_kick1) );
engine_launch( e , ( e->nr_threads > 16 ) ? 16 : e->nr_threads );
TIMER_TOC( timer_kick1 );
/* Check if all the kick1 threads have executed. */
for ( k = 0 ; k < e->sched.nr_tasks ; k++ )
if ( e->sched.tasks[k].type == task_type_kick1 &&
e->sched.tasks[k].tic == 0 )
error( "Not all kick1 tasks completed." );
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( e->s->parts , 3392063069037 , e->s->nr_parts );
/* Prepare the space. */
engine_prepare( e );
// engine_single_density( e->s->dim , 3392063069037 , e->s->parts , e->s->nr_parts , e->s->periodic );
/* Send off the runners. */
TIMER_TIC_ND
engine_launch( e , e->nr_threads );
TIMER_TOC(timer_runners);
// engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic );
// for(k=0; k<10; ++k)
// printParticle(parts, k);
// printParticle( parts , 432626 );
// printParticle( e->s->parts , 3392063069037 , e->s->nr_parts );
// printParticle( e->s->parts , 8328423931905 , e->s->nr_parts );
/* Collect the cell data from the second kick. */
for ( k = 0 ; k < s->nr_cells ; k++ ) {
c = &s->cells[k];
engine_collect_kick2( c );
dt_min = fminf( dt_min , c->dt_min );
dt_max = fmaxf( dt_max , c->dt_max );
ekin += c->ekin;
epot += c->epot;
count += c->updated;
mom[0] += c->mom[0]; mom[1] += c->mom[1]; mom[2] += c->mom[2];
ang[0] += c->ang[0]; ang[1] += c->ang[1]; ang[2] += c->ang[2];
}
e->dt_min = dt_min;
e->dt_max = dt_max;
e->count_step = count;
e->ekin = ekin;
e->epot = epot;
// printParticle( e->s->parts , 382557 , e->s->nr_parts );
// printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout);
// printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout);
// printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout);
// printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout);
// printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout);
// printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout);
/* Increase the step. */
e->step += 1;
/* Does the time step need adjusting? */
if ( e->policy & engine_policy_fixdt ) {
e->dt = e->dt_orig;
}
else {
if ( e->dt == 0 ) {
e->nullstep += 1;
e->dt = e->dt_orig;
while ( dt_min < e->dt )
e->dt *= 0.5;
while ( dt_min > 2*e->dt )
e->dt *= 2.0;
// printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt );
}
else {
while ( dt_min < e->dt ) {
e->dt *= 0.5;
e->step *= 2;
e->nullstep *= 2;
// printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt );
}
while ( dt_min > 2*e->dt && (e->step & 1) == 0 ) {
e->dt *= 2.0;
e->step /= 2;
e->nullstep /= 2;
// printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
}
}
}
/* Set the system time. */
e->time = e->dt * (e->step - e->nullstep);
TIMER_TOC2(timer_step);
}
/**
* @brief init an engine with the given number of threads, queues, and
* the given policy.
*
* @param e The #engine.
* @param s The #space in which this #runner will run.
* @param dt The initial time step to use.
* @param nr_threads The number of threads to spawn.
* @param nr_queues The number of task queues to create.
* @param policy The queueing policy to use.
*/
void engine_init ( struct engine *e , struct space *s , float dt , int nr_threads , int nr_queues , int policy ) {
int k;
float dt_min = dt;
#if defined(HAVE_SETAFFINITY)
int nr_cores = sysconf( _SC_NPROCESSORS_ONLN );
int i, j, cpuid[ nr_cores ];
cpu_set_t cpuset;
if ( policy & engine_policy_cputight ) {
for ( k = 0 ; k < nr_cores ; k++ )
cpuid[k] = k;
}
else {
cpuid[0] = 0;
k = 1;
for ( i = 1 ; i < nr_cores ; i *= 2 )
for ( j = nr_cores / i / 2 ; j < nr_cores ; j += nr_cores / i )
cpuid[k++] = j;
printf( "engine_init: cpu map is [ " );
for ( i = 0 ; i < nr_cores ; i++ )
printf( "%i " , cpuid[i] );
printf( "].\n" );
}
#endif
/* Store the values. */
e->s = s;
e->nr_threads = nr_threads;
e->policy = policy;
e->step = 0;
e->nullstep = 0;
e->time = 0.0;
/* First of all, init the barrier and lock it. */
if ( pthread_mutex_init( &e->barrier_mutex , NULL ) != 0 )
error( "Failed to initialize barrier mutex." );
if ( pthread_cond_init( &e->barrier_cond , NULL ) != 0 )
error( "Failed to initialize barrier condition variable." );
if ( pthread_mutex_lock( &e->barrier_mutex ) != 0 )
error( "Failed to lock barrier mutex." );
e->barrier_running = 0;
e->barrier_launch = 0;
/* Run through the parts and get the minimum time step. */
e->dt_orig = dt;
for ( k = 0 ; k < s->nr_parts ; k++ )
if ( s->parts[k].dt < dt_min )
dt_min = s->parts[k].dt;
if ( dt_min == 0.0f )
dt = 0.0f;
else
while ( dt > dt_min )
dt *= 0.5f;
e->dt = dt;
/* Init the scheduler. */
scheduler_init( &e->sched , e->s , nr_queues , scheduler_flag_steal );
s->nr_queues = nr_queues;
/* Append a kick1 task to each cell. */
scheduler_reset( &e->sched , s->tot_cells );
space_map_cells_pre( e->s , 1 , &scheduler_map_mkkick1 , &e->sched );
/* Allocate and init the threads. */
if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL )
error( "Failed to allocate threads array." );
for ( k = 0 ; k < nr_threads ; k++ ) {
e->runners[k].id = k;
e->runners[k].e = e;
e->barrier_running += 1;
if ( pthread_create( &e->runners[k].thread , NULL , &runner_main , &e->runners[k] ) != 0 )
error( "Failed to create runner thread." );
#if defined(HAVE_SETAFFINITY)
/* Set a reasonable queue ID. */
e->runners[k].qid = cpuid[ k ] * nr_queues / nr_threads;
/* Set the cpu mask to zero | e->id. */
CPU_ZERO( &cpuset );
CPU_SET( cpuid[ k ] , &cpuset );
/* Apply this mask to the runner's pthread. */
if ( pthread_setaffinity_np( e->runners[k].thread , sizeof(cpu_set_t) , &cpuset ) != 0 )
error( "Failed to set thread affinity." );
#else
e->runners[k].qid = k % nr_queues;
#endif
}
/* Wait for the runner threads to be in place. */
while ( e->barrier_running || e->barrier_launch )
if ( pthread_cond_wait( &e->barrier_cond , &e->barrier_mutex ) != 0 )
error( "Error while waiting for runner threads to get in place." );
}