Commit 9e91071c authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

several fixes to the integrator, correct implementation of multiple time stepping.


Former-commit-id: 5ad40ec1080ded54127fcb43088f9453d7cf73dd
parent bf944c0e
......@@ -122,14 +122,11 @@ void engine_maketasks ( struct engine *e ) {
if ( t->skip )
continue;
if ( t->type == task_type_sort && t->ci->split )
for ( j = 0 ; j < 8 ; j++ ) {
if ( t->ci->progeny[j] != NULL ) {
if ( t->ci->progeny[j]->sorts == NULL )
t->ci->progeny[j]->sorts = scheduler_addtask( sched , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 );
for ( j = 0 ; j < 8 ; j++ )
if ( t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL ) {
t->ci->progeny[j]->sorts->skip = 0;
task_addunlock( t->ci->progeny[j]->sorts , t );
}
}
if ( t->type == task_type_self ) {
atomic_inc( &t->ci->nr_tasks );
if ( t->subtype == task_subtype_density ) {
......@@ -219,20 +216,6 @@ void engine_maketasks ( struct engine *e ) {
/* Weight the tasks. */
scheduler_reweight( sched );
/* Count the number of each task type. */
int counts[ task_type_count+1 ];
for ( k = 0 ; k <= task_type_count ; k++ )
counts[k] = 0;
for ( k = 0 ; k < sched->nr_tasks ; k++ )
if ( !sched->tasks[k].skip )
counts[ (int)sched->tasks[k].type ] += 1;
else
counts[ task_type_count ] += 1;
printf( "engine_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] );
for ( k = 1 ; k < task_type_count ; k++ )
printf( " %s=%i" , taskID_names[k] , counts[k] );
printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout);
}
......@@ -375,7 +358,8 @@ int engine_marktasks ( struct engine *e ) {
void engine_prepare ( struct engine *e ) {
int rebuild;
int k, rebuild;
struct scheduler *sched = &e->sched;
TIMER_TIC
......@@ -403,16 +387,30 @@ void engine_prepare ( struct engine *e ) {
error( "engine_marktasks failed after space_rebuild." );
// printf( "engine_prepare: engine_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 );
/* Count the number of each task type. */
int counts[ task_type_count+1 ];
for ( k = 0 ; k <= task_type_count ; k++ )
counts[k] = 0;
for ( k = 0 ; k < sched->nr_tasks ; k++ )
if ( !sched->tasks[k].skip )
counts[ (int)sched->tasks[k].type ] += 1;
else
counts[ task_type_count ] += 1;
printf( "engine_prepare: task counts are [ %s=%i" , taskID_names[0] , counts[0] );
for ( k = 1 ; k < task_type_count ; k++ )
printf( " %s=%i" , taskID_names[k] , counts[k] );
printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout);
}
/* Start the scheduler. */
// ticks tic2 = getticks();
scheduler_start( &e->sched , (1 << task_type_sort) |
(1 << task_type_self) |
(1 << task_type_pair) |
(1 << task_type_sub) |
(1 << task_type_ghost) |
(1 << task_type_kick2) );
scheduler_start( sched , (1 << task_type_sort) |
(1 << task_type_self) |
(1 << task_type_pair) |
(1 << task_type_sub) |
(1 << task_type_ghost) |
(1 << task_type_kick2) );
// printf( "engine_prepare: scheduler_start took %.3f ms.\n" , (double)(getticks() - tic2)/CPU_TPS*1000 );
TIMER_TOC( timer_prepare );
......@@ -697,7 +695,7 @@ void engine_step ( struct engine *e ) {
TIMER_TIC
engine_launch( e , e->nr_threads );
TIMER_TOC(timer_runners);
// engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic );
// for(k=0; k<10; ++k)
......@@ -729,7 +727,6 @@ void engine_step ( struct engine *e ) {
// printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout);
// printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout);
// printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout);
// printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout);
// printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout);
/* Increase the step. */
......@@ -737,36 +734,51 @@ void engine_step ( struct engine *e ) {
/* Does the time step need adjusting? */
if ( e->policy & engine_policy_fixdt ) {
e->dt = e->dt_orig;
dt = e->dt_orig;
}
else {
if ( e->dt == 0 ) {
if ( dt == 0 ) {
e->nullstep += 1;
e->dt = e->dt_orig;
while ( dt_min < e->dt )
e->dt *= 0.5;
while ( dt_min > 2*e->dt )
e->dt *= 2.0;
if ( e->dt_orig > 0.0 ) {
dt = e->dt_orig;
while ( dt_min < dt )
dt *= 0.5;
while ( dt_min > 2*dt )
dt *= 2.0;
}
else
dt = dt_min;
for ( k = 0 ; k < s->nr_parts ; k++ ) {
/* struct part *p = &s->parts[k];
struct xpart *xp = &s->xparts[k];
float dt_curr = dt;
for ( int j = (int)( p->dt / dt ) ; j > 1 ; j >>= 1 )
dt_curr *= 2.0f;
xp->dt_curr = dt_curr; */
s->parts[k].dt = dt;
s->xparts[k].dt_curr = dt;
}
// printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt );
}
else {
while ( dt_min < e->dt ) {
e->dt *= 0.5;
while ( dt_min < dt ) {
dt *= 0.5;
e->step *= 2;
e->nullstep *= 2;
// printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt );
}
while ( dt_min > 2*e->dt && (e->step & 1) == 0 ) {
e->dt *= 2.0;
while ( dt_min > 2*dt && (e->step & 1) == 0 ) {
dt *= 2.0;
e->step /= 2;
e->nullstep /= 2;
// printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt );
}
}
}
e->dt = dt;
/* Set the system time. */
e->time = e->dt * (e->step - e->nullstep);
e->time = dt * (e->step - e->nullstep);
TIMER_TOC2(timer_step);
......@@ -848,6 +860,7 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread
/* Append a kick1 task to each cell. */
scheduler_reset( &e->sched , s->tot_cells );
space_map_cells_pre( e->s , 1 , &scheduler_map_mkkick1 , &e->sched );
scheduler_ranktasks( &e->sched );
/* Allocate and init the threads. */
if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL )
......
......@@ -31,11 +31,11 @@ struct xpart {
/* Old position, at last tree rebuild. */
double x_old[3];
/* Old velocity. */
float v_old[3];
/* Velocity at the half-step. */
float v_hdt[3];
/* Old entropy. */
float u_old;
/* Entropy at the half-step. */
float u_hdt;
/* Old density. */
float omega;
......
......@@ -59,34 +59,45 @@ int queue_counter[ queue_counter_count ];
void queue_insert ( struct queue *q , struct task *t ) {
int k, *tid;
struct task *tasks;
/* Lock the queue. */
if ( lock_lock( &q->lock ) != 0 )
error( "Failed to get queue lock." );
tid = q->tid;
tasks = q->tasks;
/* Does the queue need to be grown? */
if ( q->count == q->size ) {
int *temp;
q->size *= queue_sizegrow;
if ( ( temp = (int *)malloc( sizeof(int) * q->size ) ) == NULL )
error( "Failed to allocate new indices." );
memcpy( temp , q->tid , sizeof(int) * q->count );
free( q->tid );
q->tid = temp;
memcpy( temp , tid , sizeof(int) * q->count );
free( tid );
q->tid = tid = temp;
}
/* Drop the task at the end of the queue. */
q->tid[ q->count ] = ( t - q->tasks );
tid[ q->count ] = ( t - tasks );
q->count += 1;
/* Shuffle up. */
for ( int k = q->count - 1 ; k > 0 ; k = (k-1)/2 )
if ( q->tasks[ q->tid[k] ].weight > q->tasks[ q->tid[(k-1)/2] ].weight ) {
int temp = q->tid[k];
q->tid[k] = q->tid[(k-1)/2];
q->tid[(k-1)/2] = temp;
for ( k = q->count - 1 ; k > 0 ; k = (k-1)/2 )
if ( tasks[ tid[k] ].weight > tasks[ tid[(k-1)/2] ].weight ) {
int temp = tid[k];
tid[k] = tid[(k-1)/2];
tid[(k-1)/2] = temp;
}
else
break;
/* Check the queue's consistency. */
/* for ( k = 1 ; k < q->count ; k++ )
if ( tasks[ tid[(k-1)/2] ].weight < tasks[ tid[k] ].weight )
error( "Queue heap is disordered." ); */
/* Unlock the queue. */
if ( lock_unlock( &q->lock ) != 0 )
......@@ -221,6 +232,11 @@ struct task *queue_gettask ( struct queue *q , struct cell *super , int blocking
else
res = NULL;
/* Check the queue's consistency. */
/* for ( k = 1 ; k < q->count ; k++ )
if ( qtasks[ qtid[(k-1)/2] ].weight < qtasks[ qtid[k] ].weight )
error( "Queue heap is disordered." ); */
/* Release the task lock. */
if ( lock_unlock( qlock ) != 0 )
error( "Unlocking the qlock failed.\n" );
......
......@@ -518,12 +518,12 @@ void runner_doghost ( struct runner *r , struct cell *c ) {
void runner_dokick2 ( struct runner *r , struct cell *c ) {
int k, count = 0, nr_parts = c->count;
float dt_min = FLT_MAX, dt_max = 0.0f;
int j, k, count = 0, nr_parts = c->count;
float dt_curr, hdt_curr, dt_min = FLT_MAX, dt_max = 0.0f;
double ekin = 0.0, epot = 0.0;
float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f };
float x[3], v[3], u, h, pdt, m;
float dt_step = r->e->dt_step, dt = r->e->dt, hdt = 0.5f*dt;
float x[3], v_hdt[3], u_hdt, h, pdt, m;
float dt_step = r->e->dt_step, dt = r->e->dt, idt;
float dt_cfl, dt_h_change, dt_u_change, dt_new;
float h_dt, u_dt;
struct part *restrict p, *restrict parts = c->parts;
......@@ -531,6 +531,9 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) {
TIMER_TIC
/* Init idt to avoid compiler stupidity. */
idt = ( dt > 0 ) ? 1.0f / dt : 0.0f;
/* Loop over the particles and kick them. */
__builtin_prefetch( &parts[0] , 0 , 1 );
__builtin_prefetch( &parts[0].rho_dh , 0 , 1 );
......@@ -552,55 +555,77 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) {
/* Get local copies of particle data. */
pdt = p->dt;
u_dt = p->force.u_dt;
h = p->h;
m = p->mass;
x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
v_hdt[0] = xp->v_hdt[0]; v_hdt[1] = xp->v_hdt[1]; v_hdt[2] = xp->v_hdt[2];
u_hdt = xp->u_hdt;
/* Scale the derivatives if they're freshly computed. */
/* Update the particle's data (if active). */
if ( pdt <= dt_step ) {
h_dt = p->force.h_dt *= h * 0.333333333f;
/* Increase the number of particles updated. */
count += 1;
/* Scale the derivatives as they're freshly computed. */
h = p->h;
h_dt = p->force.h_dt *= h * 0.333333333f;
xp->omega = 1.0f + h * p->rho_dh / p->rho * 0.3333333333f;
/* Compute the new time step. */
u_dt = p->force.u_dt;
dt_cfl = const_cfl * h / p->force.v_sig;
dt_h_change = ( h_dt != 0.0f ) ? fabsf( const_ln_max_h_change * h / h_dt ) : FLT_MAX;
dt_u_change = ( u_dt != 0.0f ) ? fabsf( const_max_u_change * p->u / u_dt ) : FLT_MAX;
dt_new = fminf( dt_cfl , fminf( dt_h_change , dt_u_change ) );
if ( pdt == 0.0f )
p->dt = pdt = dt_new;
else
p->dt = pdt = fminf( dt_new , 2.0f*pdt );
/* Get the particle-specific time step. */
dt_curr = xp->dt_curr;
hdt_curr = 0.5f * dt_curr;
/* Update positions and energies at the full step. */
p->v[0] = v_hdt[0] + hdt_curr * p->a[0];
p->v[1] = v_hdt[1] + hdt_curr * p->a[1];
p->v[2] = v_hdt[2] + hdt_curr * p->a[2];
p->u = u_hdt + hdt_curr * u_dt;
xp->v_hdt[0] = ( v_hdt[0] += dt_curr * p->a[0] );
xp->v_hdt[1] = ( v_hdt[1] += dt_curr * p->a[1] );
xp->v_hdt[2] = ( v_hdt[2] += dt_curr * p->a[2] );
xp->u_hdt = ( u_hdt += dt_curr * u_dt );
/* Set the new particle-specific time step. */
if ( dt > 0.0f ) {
dt_curr = dt;
j = (int)( pdt * idt );
while ( j > 1 ) {
dt_curr *= 2.0f;
j >>= 1;
}
xp->dt_curr = dt_curr;
}
}
else
h_dt = p->force.h_dt;
/* Update the particle's time step. */
dt_cfl = const_cfl * h / p->force.v_sig;
dt_h_change = ( h_dt != 0.0f ) ? fabsf( const_ln_max_h_change * h / h_dt ) : FLT_MAX;
dt_u_change = ( u_dt != 0.0f ) ? fabsf( const_max_u_change * p->u / u_dt ) : FLT_MAX;
dt_new = fminf( dt_cfl , fminf( dt_h_change , dt_u_change ) );
if ( pdt == 0.0f )
p->dt = pdt = dt_new;
else if ( pdt <= dt_step )
p->dt = pdt = fminf( dt_new , 2.0f*pdt );
else
p->dt = pdt = fminf( dt_new , pdt );
/* Update positions and energies at the half-step. */
p->v[0] = ( v[0] = xp->v_old[0] + hdt * p->a[0] );
p->v[1] = ( v[1] = xp->v_old[1] + hdt * p->a[1] );
p->v[2] = ( v[2] = xp->v_old[2] + hdt * p->a[2] );
p->u = ( u = xp->u_old + hdt * u_dt );
/* Get the smallest/largest dt. */
dt_min = fminf( dt_min , pdt );
dt_max = fmaxf( dt_max , pdt );
/* Collect total energy. */
ekin += 0.5 * m * ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] );
epot += m * u;
ekin += 0.5 * m * ( v_hdt[0]*v_hdt[0] + v_hdt[1]*v_hdt[1] + v_hdt[2]*v_hdt[2] );
epot += m * u_hdt;
/* Collect momentum */
mom[0] += m * v[0];
mom[1] += m * v[1];
mom[2] += m * v[2];
mom[0] += m * v_hdt[0];
mom[1] += m * v_hdt[1];
mom[2] += m * v_hdt[2];
/* Collect angular momentum */
ang[0] += m * ( x[1]*v[2] - x[2]*v[1] );
ang[1] += m * ( x[2]*v[0] - x[0]*v[2] );
ang[2] += m * ( x[0]*v[1] - x[1]*v[0] );
ang[0] += m * ( x[1]*v_hdt[2] - x[2]*v_hdt[1] );
ang[1] += m * ( x[2]*v_hdt[0] - x[0]*v_hdt[2] );
ang[2] += m * ( x[0]*v_hdt[1] - x[1]*v_hdt[0] );
/* Collect entropic function */
// lent += u * pow( p->rho, 1.f-const_gamma );
......@@ -636,9 +661,9 @@ void runner_dokick1 ( struct runner *r , struct cell *c ) {
int j, k;
struct engine *e = r->e;
float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt;
float pdt, dt_step = e->dt_step, dt = e->dt;
float dt_min, dt_max, h_max, dx, dx_max;
float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho;
float a[3], v[3], u, u_dt, h, h_dt, w, rho;
double x[3], x_old[3];
struct part *restrict p, *restrict parts = c->parts;
struct xpart *restrict xp, *restrict xparts = c->xparts;
......@@ -686,16 +711,10 @@ void runner_dokick1 ( struct runner *r , struct cell *c ) {
dt_min = fminf( dt_min , pdt );
dt_max = fmaxf( dt_max , pdt );
/* Step and store the velocity and internal energy. */
xp->v_old[0] = v_old[0] = v[0] + hdt * a[0];
xp->v_old[1] = v_old[1] = v[1] + hdt * a[1];
xp->v_old[2] = v_old[2] = v[2] + hdt * a[2];
xp->u_old = p->u + hdt * p->force.u_dt;
/* Move the particles with the velocitie at the half-step. */
p->x[0] = x[0] += dt * v_old[0];
p->x[1] = x[1] += dt * v_old[1];
p->x[2] = x[2] += dt * v_old[2];
/* Move the particles with the velocities at the half-step. */
p->x[0] = x[0] += dt * xp->v_hdt[0];
p->x[1] = x[1] += dt * xp->v_hdt[1];
p->x[2] = x[2] += dt * xp->v_hdt[2];
dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
(x[1] - x_old[1])*(x[1] - x_old[1]) +
(x[2] - x_old[2])*(x[2] - x_old[2]) );
......@@ -787,7 +806,7 @@ void runner_dokick ( struct runner *r , struct cell *c , int timer ) {
float h_max, dx, dx_max;
double ekin = 0.0, epot = 0.0;
float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f };
float x[3], x_old[3], v[3], v_old[3], a[3], u, u_old, h, pdt, m, w;
float x[3], x_old[3], v_hdt[3], a[3], u, u_hdt, h, pdt, m, w;
float dt = r->e->dt, hdt = 0.5f*dt;
float dt_cfl, dt_h_change, dt_u_change, dt_new;
float h_dt, u_dt;
......@@ -832,8 +851,8 @@ void runner_dokick ( struct runner *r , struct cell *c , int timer ) {
x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2];
a[0] = p->a[0]; a[1] = p->a[1]; a[2] = p->a[2];
x_old[0] = xp->x_old[0]; x_old[1] = xp->x_old[1]; x_old[2] = xp->x_old[2];
v_old[0] = xp->v_old[0]; v_old[1] = xp->v_old[1]; v_old[2] = xp->v_old[2];
u_old = xp->u_old;
v_hdt[0] = xp->v_hdt[0]; v_hdt[1] = xp->v_hdt[1]; v_hdt[2] = xp->v_hdt[2];
u_hdt = xp->u_hdt;
/* Scale the derivatives if they're freshly computed. */
h_dt = p->force.h_dt *= h * 0.333333333f;
......@@ -854,57 +873,51 @@ void runner_dokick ( struct runner *r , struct cell *c , int timer ) {
dt_min = fminf( dt_min , pdt );
dt_max = fmaxf( dt_max , pdt );
/* Get the instentaneous velocity and internal energy. */
v[0] = v_old[0] + hdt * a[0];
v[1] = v_old[1] + hdt * a[1];
v[2] = v_old[2] + hdt * a[2];
u = u_old + hdt * u_dt;
/* Collect momentum */
mom[0] += m * v[0];
mom[1] += m * v[1];
mom[2] += m * v[2];
/* Collect angular momentum */
ang[0] += m * ( x[1]*v[2] - x[2]*v[1] );
ang[1] += m * ( x[2]*v[0] - x[0]*v[2] );
ang[2] += m * ( x[0]*v[1] - x[1]*v[0] );
/* Collect total energy. */
ekin += 0.5 * m * ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] );
epot += m * u;
/* Step and store the velocity and internal energy. */
xp->v_old[0] = ( v_old[0] += dt * a[0] );
xp->v_old[1] = ( v_old[1] += dt * a[1] );
xp->v_old[2] = ( v_old[2] += dt * a[2] );
xp->u_old = u_old + hdt * u_dt;
xp->v_hdt[0] = ( v_hdt[0] += dt * a[0] );
xp->v_hdt[1] = ( v_hdt[1] += dt * a[1] );
xp->v_hdt[2] = ( v_hdt[2] += dt * a[2] );
xp->u_hdt = ( u_hdt += dt * u_dt );
/* Move the particles with the velocitie at the half-step. */
p->x[0] = x[0] += dt * v_old[0];
p->x[1] = x[1] += dt * v_old[1];
p->x[2] = x[2] += dt * v_old[2];
p->x[0] = x[0] += dt * v_hdt[0];
p->x[1] = x[1] += dt * v_hdt[1];
p->x[2] = x[2] += dt * v_hdt[2];
dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) +
(x[1] - x_old[1])*(x[1] - x_old[1]) +
(x[2] - x_old[2])*(x[2] - x_old[2]) );
dx_max = fmaxf( dx_max , dx );
/* Update positions and energies at the half-step. */
p->v[0] = v[0] + dt * a[0];
p->v[1] = v[1] + dt * a[1];
p->v[2] = v[2] + dt * a[2];
w = u_dt / u * dt;
/* Update positions and energies at the next full step. */
p->v[0] = v_hdt[0] + hdt * a[0];
p->v[1] = v_hdt[1] + hdt * a[1];
p->v[2] = v_hdt[2] + hdt * a[2];
w = u_dt / u_hdt * hdt;
if ( fabsf( w ) < 0.01f )
p->u = u * ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) );
p->u = u = u_hdt * ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) );
else
p->u = u * expf( w );
p->u = u = u_hdt * expf( w );
w = h_dt / h * dt;
if ( fabsf( w ) < 0.01f )
p->h = h * ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) );
p->h = h *= ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) );
else
p->h = h * expf( w );
p->h = h *= expf( w );
h_max = fmaxf( h_max , h );
/* Collect momentum */
mom[0] += m * v_hdt[0];
mom[1] += m * v_hdt[1];
mom[2] += m * v_hdt[2];
/* Collect angular momentum */
ang[0] += m * ( x[1]*v_hdt[2] - x[2]*v_hdt[1] );
ang[1] += m * ( x[2]*v_hdt[0] - x[0]*v_hdt[2] );
ang[2] += m * ( x[0]*v_hdt[1] - x[1]*v_hdt[0] );
/* Collect total energy. */
ekin += 0.5 * m * ( v_hdt[0]*v_hdt[0] + v_hdt[1]*v_hdt[1] + v_hdt[2]*v_hdt[2] );
epot += m * u_hdt;
/* Init fields for density calculation. */
p->density.wcount = 0.0f;
p->density.wcount_dh = 0.0f;
......
......@@ -541,6 +541,11 @@ void scheduler_ranktasks ( struct scheduler *s ) {
}
/* Verify that the tasks were ranked correctly. */
/* for ( k = 1 ; k < s->nr_tasks ; k++ )
if ( tasks[ tid[k-1] ].rank > tasks[ tid[k-1] ].rank )
error( "Task ranking failed." ); */
}
......@@ -656,11 +661,12 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) {
struct task *t, *tasks = s->tasks;
// ticks tic;
/* Run throught the tasks backwards and set their waits. */
/* Run throught the tasks and set their waits. */
// tic = getticks();
// #pragma omp parallel for schedule(static) private(t,j)
for ( k = nr_tasks-1 ; k >= 0 ; k-- ) {
for ( k = nr_tasks - 1 ; k >= 0 ; k-- ) {
t = &tasks[ tid[k] ];
t->wait = 0;
if ( !( (1 << t->type) & mask ) || t->skip )
continue;
for ( j = 0 ; j < t->nr_unlock_tasks ; j++ )
......@@ -673,10 +679,13 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) {
// #pragma omp parallel for schedule(static) private(t)
for ( k = 0 ; k < nr_tasks ; k++ ) {
t = &tasks[ tid[k] ];
if ( t->rank > 0 )
break;
if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 )
scheduler_enqueue( s , t );
if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 ) {
if ( t->implicit )
for ( j = 0 ; j < t->nr_unlock_tasks ; j++ )
atomic_dec( &t->unlock_tasks[j]->wait );
else
scheduler_enqueue( s , t );
}
}
// printf( "scheduler_start: enqueueing tasks took %.3f ms.\n" , (double)( getticks() - tic ) / CPU_TPS * 1000 );
......@@ -744,55 +753,6 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
}
/**
* @brief Take care of a tasks dependencies.
*
* @param s The #scheduler.
* @param t The finished #task.
*/