diff --git a/src/engine.c b/src/engine.c index bb0d886fb65d026fa4c4f277b569516eed76a96a..10aa28cba15df61232373b4c1c12ea5a4633fa4a 100644 --- a/src/engine.c +++ b/src/engine.c @@ -122,14 +122,11 @@ void engine_maketasks ( struct engine *e ) { if ( t->skip ) continue; if ( t->type == task_type_sort && t->ci->split ) - for ( j = 0 ; j < 8 ; j++ ) { - if ( t->ci->progeny[j] != NULL ) { - if ( t->ci->progeny[j]->sorts == NULL ) - t->ci->progeny[j]->sorts = scheduler_addtask( sched , task_type_sort , task_subtype_none , t->flags , 0 , t->ci->progeny[j] , NULL , 0 ); + for ( j = 0 ; j < 8 ; j++ ) + if ( t->ci->progeny[j] != NULL && t->ci->progeny[j]->sorts != NULL ) { t->ci->progeny[j]->sorts->skip = 0; task_addunlock( t->ci->progeny[j]->sorts , t ); } - } if ( t->type == task_type_self ) { atomic_inc( &t->ci->nr_tasks ); if ( t->subtype == task_subtype_density ) { @@ -219,20 +216,6 @@ void engine_maketasks ( struct engine *e ) { /* Weight the tasks. */ scheduler_reweight( sched ); - /* Count the number of each task type. */ - int counts[ task_type_count+1 ]; - for ( k = 0 ; k <= task_type_count ; k++ ) - counts[k] = 0; - for ( k = 0 ; k < sched->nr_tasks ; k++ ) - if ( !sched->tasks[k].skip ) - counts[ (int)sched->tasks[k].type ] += 1; - else - counts[ task_type_count ] += 1; - printf( "engine_maketasks: task counts are [ %s=%i" , taskID_names[0] , counts[0] ); - for ( k = 1 ; k < task_type_count ; k++ ) - printf( " %s=%i" , taskID_names[k] , counts[k] ); - printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); - } @@ -375,7 +358,8 @@ int engine_marktasks ( struct engine *e ) { void engine_prepare ( struct engine *e ) { - int rebuild; + int k, rebuild; + struct scheduler *sched = &e->sched; TIMER_TIC @@ -403,16 +387,30 @@ void engine_prepare ( struct engine *e ) { error( "engine_marktasks failed after space_rebuild." ); // printf( "engine_prepare: engine_marktasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); + /* Count the number of each task type. */ + int counts[ task_type_count+1 ]; + for ( k = 0 ; k <= task_type_count ; k++ ) + counts[k] = 0; + for ( k = 0 ; k < sched->nr_tasks ; k++ ) + if ( !sched->tasks[k].skip ) + counts[ (int)sched->tasks[k].type ] += 1; + else + counts[ task_type_count ] += 1; + printf( "engine_prepare: task counts are [ %s=%i" , taskID_names[0] , counts[0] ); + for ( k = 1 ; k < task_type_count ; k++ ) + printf( " %s=%i" , taskID_names[k] , counts[k] ); + printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); + } /* Start the scheduler. */ // ticks tic2 = getticks(); - scheduler_start( &e->sched , (1 << task_type_sort) | - (1 << task_type_self) | - (1 << task_type_pair) | - (1 << task_type_sub) | - (1 << task_type_ghost) | - (1 << task_type_kick2) ); + scheduler_start( sched , (1 << task_type_sort) | + (1 << task_type_self) | + (1 << task_type_pair) | + (1 << task_type_sub) | + (1 << task_type_ghost) | + (1 << task_type_kick2) ); // printf( "engine_prepare: scheduler_start took %.3f ms.\n" , (double)(getticks() - tic2)/CPU_TPS*1000 ); TIMER_TOC( timer_prepare ); @@ -697,7 +695,7 @@ void engine_step ( struct engine *e ) { TIMER_TIC engine_launch( e , e->nr_threads ); TIMER_TOC(timer_runners); - + // engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic ); // for(k=0; k<10; ++k) @@ -729,7 +727,6 @@ void engine_step ( struct engine *e ) { // printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout); // printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout); // printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout); - // printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout); // printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout); /* Increase the step. */ @@ -737,36 +734,51 @@ void engine_step ( struct engine *e ) { /* Does the time step need adjusting? */ if ( e->policy & engine_policy_fixdt ) { - e->dt = e->dt_orig; + dt = e->dt_orig; } else { - if ( e->dt == 0 ) { + if ( dt == 0 ) { e->nullstep += 1; - e->dt = e->dt_orig; - while ( dt_min < e->dt ) - e->dt *= 0.5; - while ( dt_min > 2*e->dt ) - e->dt *= 2.0; + if ( e->dt_orig > 0.0 ) { + dt = e->dt_orig; + while ( dt_min < dt ) + dt *= 0.5; + while ( dt_min > 2*dt ) + dt *= 2.0; + } + else + dt = dt_min; + for ( k = 0 ; k < s->nr_parts ; k++ ) { + /* struct part *p = &s->parts[k]; + struct xpart *xp = &s->xparts[k]; + float dt_curr = dt; + for ( int j = (int)( p->dt / dt ) ; j > 1 ; j >>= 1 ) + dt_curr *= 2.0f; + xp->dt_curr = dt_curr; */ + s->parts[k].dt = dt; + s->xparts[k].dt_curr = dt; + } // printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt ); } else { - while ( dt_min < e->dt ) { - e->dt *= 0.5; + while ( dt_min < dt ) { + dt *= 0.5; e->step *= 2; e->nullstep *= 2; // printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt ); } - while ( dt_min > 2*e->dt && (e->step & 1) == 0 ) { - e->dt *= 2.0; + while ( dt_min > 2*dt && (e->step & 1) == 0 ) { + dt *= 2.0; e->step /= 2; e->nullstep /= 2; // printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt ); } } } + e->dt = dt; /* Set the system time. */ - e->time = e->dt * (e->step - e->nullstep); + e->time = dt * (e->step - e->nullstep); TIMER_TOC2(timer_step); @@ -848,6 +860,7 @@ void engine_init ( struct engine *e , struct space *s , float dt , int nr_thread /* Append a kick1 task to each cell. */ scheduler_reset( &e->sched , s->tot_cells ); space_map_cells_pre( e->s , 1 , &scheduler_map_mkkick1 , &e->sched ); + scheduler_ranktasks( &e->sched ); /* Allocate and init the threads. */ if ( ( e->runners = (struct runner *)malloc( sizeof(struct runner) * nr_threads ) ) == NULL ) diff --git a/src/part.h b/src/part.h index d5e5bb54e22e52ff44e161dce34fcffa08f9a0c2..7c503486c1069ff723d218dd705a7cc82a42943a 100644 --- a/src/part.h +++ b/src/part.h @@ -31,11 +31,11 @@ struct xpart { /* Old position, at last tree rebuild. */ double x_old[3]; - /* Old velocity. */ - float v_old[3]; + /* Velocity at the half-step. */ + float v_hdt[3]; - /* Old entropy. */ - float u_old; + /* Entropy at the half-step. */ + float u_hdt; /* Old density. */ float omega; diff --git a/src/queue.c b/src/queue.c index 918b4c108252a7a9954277e9f46a592581a423f7..7b66ccbeeb67b2f75e63ab4cd5bcf3bdb2fc5b1f 100644 --- a/src/queue.c +++ b/src/queue.c @@ -59,34 +59,45 @@ int queue_counter[ queue_counter_count ]; void queue_insert ( struct queue *q , struct task *t ) { + int k, *tid; + struct task *tasks; + /* Lock the queue. */ if ( lock_lock( &q->lock ) != 0 ) error( "Failed to get queue lock." ); + tid = q->tid; + tasks = q->tasks; + /* Does the queue need to be grown? */ if ( q->count == q->size ) { int *temp; q->size *= queue_sizegrow; if ( ( temp = (int *)malloc( sizeof(int) * q->size ) ) == NULL ) error( "Failed to allocate new indices." ); - memcpy( temp , q->tid , sizeof(int) * q->count ); - free( q->tid ); - q->tid = temp; + memcpy( temp , tid , sizeof(int) * q->count ); + free( tid ); + q->tid = tid = temp; } /* Drop the task at the end of the queue. */ - q->tid[ q->count ] = ( t - q->tasks ); + tid[ q->count ] = ( t - tasks ); q->count += 1; /* Shuffle up. */ - for ( int k = q->count - 1 ; k > 0 ; k = (k-1)/2 ) - if ( q->tasks[ q->tid[k] ].weight > q->tasks[ q->tid[(k-1)/2] ].weight ) { - int temp = q->tid[k]; - q->tid[k] = q->tid[(k-1)/2]; - q->tid[(k-1)/2] = temp; + for ( k = q->count - 1 ; k > 0 ; k = (k-1)/2 ) + if ( tasks[ tid[k] ].weight > tasks[ tid[(k-1)/2] ].weight ) { + int temp = tid[k]; + tid[k] = tid[(k-1)/2]; + tid[(k-1)/2] = temp; } else break; + + /* Check the queue's consistency. */ + /* for ( k = 1 ; k < q->count ; k++ ) + if ( tasks[ tid[(k-1)/2] ].weight < tasks[ tid[k] ].weight ) + error( "Queue heap is disordered." ); */ /* Unlock the queue. */ if ( lock_unlock( &q->lock ) != 0 ) @@ -221,6 +232,11 @@ struct task *queue_gettask ( struct queue *q , struct cell *super , int blocking else res = NULL; + /* Check the queue's consistency. */ + /* for ( k = 1 ; k < q->count ; k++ ) + if ( qtasks[ qtid[(k-1)/2] ].weight < qtasks[ qtid[k] ].weight ) + error( "Queue heap is disordered." ); */ + /* Release the task lock. */ if ( lock_unlock( qlock ) != 0 ) error( "Unlocking the qlock failed.\n" ); diff --git a/src/runner.c b/src/runner.c index b9c82b19b539d7fed3fbbbbad0c26b4e41cf040e..665ca5262769c4ac5adae5851c72eba6878e7743 100644 --- a/src/runner.c +++ b/src/runner.c @@ -518,12 +518,12 @@ void runner_doghost ( struct runner *r , struct cell *c ) { void runner_dokick2 ( struct runner *r , struct cell *c ) { - int k, count = 0, nr_parts = c->count; - float dt_min = FLT_MAX, dt_max = 0.0f; + int j, k, count = 0, nr_parts = c->count; + float dt_curr, hdt_curr, dt_min = FLT_MAX, dt_max = 0.0f; double ekin = 0.0, epot = 0.0; float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f }; - float x[3], v[3], u, h, pdt, m; - float dt_step = r->e->dt_step, dt = r->e->dt, hdt = 0.5f*dt; + float x[3], v_hdt[3], u_hdt, h, pdt, m; + float dt_step = r->e->dt_step, dt = r->e->dt, idt; float dt_cfl, dt_h_change, dt_u_change, dt_new; float h_dt, u_dt; struct part *restrict p, *restrict parts = c->parts; @@ -531,6 +531,9 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { TIMER_TIC + /* Init idt to avoid compiler stupidity. */ + idt = ( dt > 0 ) ? 1.0f / dt : 0.0f; + /* Loop over the particles and kick them. */ __builtin_prefetch( &parts[0] , 0 , 1 ); __builtin_prefetch( &parts[0].rho_dh , 0 , 1 ); @@ -552,55 +555,77 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { /* Get local copies of particle data. */ pdt = p->dt; - u_dt = p->force.u_dt; - h = p->h; m = p->mass; x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2]; + v_hdt[0] = xp->v_hdt[0]; v_hdt[1] = xp->v_hdt[1]; v_hdt[2] = xp->v_hdt[2]; + u_hdt = xp->u_hdt; - /* Scale the derivatives if they're freshly computed. */ + /* Update the particle's data (if active). */ if ( pdt <= dt_step ) { - h_dt = p->force.h_dt *= h * 0.333333333f; + + /* Increase the number of particles updated. */ count += 1; + + /* Scale the derivatives as they're freshly computed. */ + h = p->h; + h_dt = p->force.h_dt *= h * 0.333333333f; xp->omega = 1.0f + h * p->rho_dh / p->rho * 0.3333333333f; + + /* Compute the new time step. */ + u_dt = p->force.u_dt; + dt_cfl = const_cfl * h / p->force.v_sig; + dt_h_change = ( h_dt != 0.0f ) ? fabsf( const_ln_max_h_change * h / h_dt ) : FLT_MAX; + dt_u_change = ( u_dt != 0.0f ) ? fabsf( const_max_u_change * p->u / u_dt ) : FLT_MAX; + dt_new = fminf( dt_cfl , fminf( dt_h_change , dt_u_change ) ); + if ( pdt == 0.0f ) + p->dt = pdt = dt_new; + else + p->dt = pdt = fminf( dt_new , 2.0f*pdt ); + + /* Get the particle-specific time step. */ + dt_curr = xp->dt_curr; + hdt_curr = 0.5f * dt_curr; + + /* Update positions and energies at the full step. */ + p->v[0] = v_hdt[0] + hdt_curr * p->a[0]; + p->v[1] = v_hdt[1] + hdt_curr * p->a[1]; + p->v[2] = v_hdt[2] + hdt_curr * p->a[2]; + p->u = u_hdt + hdt_curr * u_dt; + xp->v_hdt[0] = ( v_hdt[0] += dt_curr * p->a[0] ); + xp->v_hdt[1] = ( v_hdt[1] += dt_curr * p->a[1] ); + xp->v_hdt[2] = ( v_hdt[2] += dt_curr * p->a[2] ); + xp->u_hdt = ( u_hdt += dt_curr * u_dt ); + + /* Set the new particle-specific time step. */ + if ( dt > 0.0f ) { + dt_curr = dt; + j = (int)( pdt * idt ); + while ( j > 1 ) { + dt_curr *= 2.0f; + j >>= 1; + } + xp->dt_curr = dt_curr; + } + } - else - h_dt = p->force.h_dt; - - /* Update the particle's time step. */ - dt_cfl = const_cfl * h / p->force.v_sig; - dt_h_change = ( h_dt != 0.0f ) ? fabsf( const_ln_max_h_change * h / h_dt ) : FLT_MAX; - dt_u_change = ( u_dt != 0.0f ) ? fabsf( const_max_u_change * p->u / u_dt ) : FLT_MAX; - dt_new = fminf( dt_cfl , fminf( dt_h_change , dt_u_change ) ); - if ( pdt == 0.0f ) - p->dt = pdt = dt_new; - else if ( pdt <= dt_step ) - p->dt = pdt = fminf( dt_new , 2.0f*pdt ); - else - p->dt = pdt = fminf( dt_new , pdt ); - - /* Update positions and energies at the half-step. */ - p->v[0] = ( v[0] = xp->v_old[0] + hdt * p->a[0] ); - p->v[1] = ( v[1] = xp->v_old[1] + hdt * p->a[1] ); - p->v[2] = ( v[2] = xp->v_old[2] + hdt * p->a[2] ); - p->u = ( u = xp->u_old + hdt * u_dt ); /* Get the smallest/largest dt. */ dt_min = fminf( dt_min , pdt ); dt_max = fmaxf( dt_max , pdt ); /* Collect total energy. */ - ekin += 0.5 * m * ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] ); - epot += m * u; + ekin += 0.5 * m * ( v_hdt[0]*v_hdt[0] + v_hdt[1]*v_hdt[1] + v_hdt[2]*v_hdt[2] ); + epot += m * u_hdt; /* Collect momentum */ - mom[0] += m * v[0]; - mom[1] += m * v[1]; - mom[2] += m * v[2]; + mom[0] += m * v_hdt[0]; + mom[1] += m * v_hdt[1]; + mom[2] += m * v_hdt[2]; /* Collect angular momentum */ - ang[0] += m * ( x[1]*v[2] - x[2]*v[1] ); - ang[1] += m * ( x[2]*v[0] - x[0]*v[2] ); - ang[2] += m * ( x[0]*v[1] - x[1]*v[0] ); + ang[0] += m * ( x[1]*v_hdt[2] - x[2]*v_hdt[1] ); + ang[1] += m * ( x[2]*v_hdt[0] - x[0]*v_hdt[2] ); + ang[2] += m * ( x[0]*v_hdt[1] - x[1]*v_hdt[0] ); /* Collect entropic function */ // lent += u * pow( p->rho, 1.f-const_gamma ); @@ -636,9 +661,9 @@ void runner_dokick1 ( struct runner *r , struct cell *c ) { int j, k; struct engine *e = r->e; - float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; + float pdt, dt_step = e->dt_step, dt = e->dt; float dt_min, dt_max, h_max, dx, dx_max; - float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho; + float a[3], v[3], u, u_dt, h, h_dt, w, rho; double x[3], x_old[3]; struct part *restrict p, *restrict parts = c->parts; struct xpart *restrict xp, *restrict xparts = c->xparts; @@ -686,16 +711,10 @@ void runner_dokick1 ( struct runner *r , struct cell *c ) { dt_min = fminf( dt_min , pdt ); dt_max = fmaxf( dt_max , pdt ); - /* Step and store the velocity and internal energy. */ - xp->v_old[0] = v_old[0] = v[0] + hdt * a[0]; - xp->v_old[1] = v_old[1] = v[1] + hdt * a[1]; - xp->v_old[2] = v_old[2] = v[2] + hdt * a[2]; - xp->u_old = p->u + hdt * p->force.u_dt; - - /* Move the particles with the velocitie at the half-step. */ - p->x[0] = x[0] += dt * v_old[0]; - p->x[1] = x[1] += dt * v_old[1]; - p->x[2] = x[2] += dt * v_old[2]; + /* Move the particles with the velocities at the half-step. */ + p->x[0] = x[0] += dt * xp->v_hdt[0]; + p->x[1] = x[1] += dt * xp->v_hdt[1]; + p->x[2] = x[2] += dt * xp->v_hdt[2]; dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) + (x[1] - x_old[1])*(x[1] - x_old[1]) + (x[2] - x_old[2])*(x[2] - x_old[2]) ); @@ -787,7 +806,7 @@ void runner_dokick ( struct runner *r , struct cell *c , int timer ) { float h_max, dx, dx_max; double ekin = 0.0, epot = 0.0; float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f }; - float x[3], x_old[3], v[3], v_old[3], a[3], u, u_old, h, pdt, m, w; + float x[3], x_old[3], v_hdt[3], a[3], u, u_hdt, h, pdt, m, w; float dt = r->e->dt, hdt = 0.5f*dt; float dt_cfl, dt_h_change, dt_u_change, dt_new; float h_dt, u_dt; @@ -832,8 +851,8 @@ void runner_dokick ( struct runner *r , struct cell *c , int timer ) { x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2]; a[0] = p->a[0]; a[1] = p->a[1]; a[2] = p->a[2]; x_old[0] = xp->x_old[0]; x_old[1] = xp->x_old[1]; x_old[2] = xp->x_old[2]; - v_old[0] = xp->v_old[0]; v_old[1] = xp->v_old[1]; v_old[2] = xp->v_old[2]; - u_old = xp->u_old; + v_hdt[0] = xp->v_hdt[0]; v_hdt[1] = xp->v_hdt[1]; v_hdt[2] = xp->v_hdt[2]; + u_hdt = xp->u_hdt; /* Scale the derivatives if they're freshly computed. */ h_dt = p->force.h_dt *= h * 0.333333333f; @@ -854,57 +873,51 @@ void runner_dokick ( struct runner *r , struct cell *c , int timer ) { dt_min = fminf( dt_min , pdt ); dt_max = fmaxf( dt_max , pdt ); - /* Get the instentaneous velocity and internal energy. */ - v[0] = v_old[0] + hdt * a[0]; - v[1] = v_old[1] + hdt * a[1]; - v[2] = v_old[2] + hdt * a[2]; - u = u_old + hdt * u_dt; - - /* Collect momentum */ - mom[0] += m * v[0]; - mom[1] += m * v[1]; - mom[2] += m * v[2]; - - /* Collect angular momentum */ - ang[0] += m * ( x[1]*v[2] - x[2]*v[1] ); - ang[1] += m * ( x[2]*v[0] - x[0]*v[2] ); - ang[2] += m * ( x[0]*v[1] - x[1]*v[0] ); - - /* Collect total energy. */ - ekin += 0.5 * m * ( v[0]*v[0] + v[1]*v[1] + v[2]*v[2] ); - epot += m * u; - /* Step and store the velocity and internal energy. */ - xp->v_old[0] = ( v_old[0] += dt * a[0] ); - xp->v_old[1] = ( v_old[1] += dt * a[1] ); - xp->v_old[2] = ( v_old[2] += dt * a[2] ); - xp->u_old = u_old + hdt * u_dt; + xp->v_hdt[0] = ( v_hdt[0] += dt * a[0] ); + xp->v_hdt[1] = ( v_hdt[1] += dt * a[1] ); + xp->v_hdt[2] = ( v_hdt[2] += dt * a[2] ); + xp->u_hdt = ( u_hdt += dt * u_dt ); /* Move the particles with the velocitie at the half-step. */ - p->x[0] = x[0] += dt * v_old[0]; - p->x[1] = x[1] += dt * v_old[1]; - p->x[2] = x[2] += dt * v_old[2]; + p->x[0] = x[0] += dt * v_hdt[0]; + p->x[1] = x[1] += dt * v_hdt[1]; + p->x[2] = x[2] += dt * v_hdt[2]; dx = sqrtf( (x[0] - x_old[0])*(x[0] - x_old[0]) + (x[1] - x_old[1])*(x[1] - x_old[1]) + (x[2] - x_old[2])*(x[2] - x_old[2]) ); dx_max = fmaxf( dx_max , dx ); - /* Update positions and energies at the half-step. */ - p->v[0] = v[0] + dt * a[0]; - p->v[1] = v[1] + dt * a[1]; - p->v[2] = v[2] + dt * a[2]; - w = u_dt / u * dt; + /* Update positions and energies at the next full step. */ + p->v[0] = v_hdt[0] + hdt * a[0]; + p->v[1] = v_hdt[1] + hdt * a[1]; + p->v[2] = v_hdt[2] + hdt * a[2]; + w = u_dt / u_hdt * hdt; if ( fabsf( w ) < 0.01f ) - p->u = u * ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) ); + p->u = u = u_hdt * ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) ); else - p->u = u * expf( w ); + p->u = u = u_hdt * expf( w ); w = h_dt / h * dt; if ( fabsf( w ) < 0.01f ) - p->h = h * ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) ); + p->h = h *= ( 1.0f + w*( 1.0f + w*( 0.5f + w*( 1.0f/6.0f + 1.0f/24.0f*w ) ) ) ); else - p->h = h * expf( w ); + p->h = h *= expf( w ); h_max = fmaxf( h_max , h ); + /* Collect momentum */ + mom[0] += m * v_hdt[0]; + mom[1] += m * v_hdt[1]; + mom[2] += m * v_hdt[2]; + + /* Collect angular momentum */ + ang[0] += m * ( x[1]*v_hdt[2] - x[2]*v_hdt[1] ); + ang[1] += m * ( x[2]*v_hdt[0] - x[0]*v_hdt[2] ); + ang[2] += m * ( x[0]*v_hdt[1] - x[1]*v_hdt[0] ); + + /* Collect total energy. */ + ekin += 0.5 * m * ( v_hdt[0]*v_hdt[0] + v_hdt[1]*v_hdt[1] + v_hdt[2]*v_hdt[2] ); + epot += m * u_hdt; + /* Init fields for density calculation. */ p->density.wcount = 0.0f; p->density.wcount_dh = 0.0f; diff --git a/src/scheduler.c b/src/scheduler.c index b281f1db00747cd42ab1da5247deb6fb87ddcd25..a3f711fedf9948183671e80fa8173da9f5106b97 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -541,6 +541,11 @@ void scheduler_ranktasks ( struct scheduler *s ) { } + /* Verify that the tasks were ranked correctly. */ + /* for ( k = 1 ; k < s->nr_tasks ; k++ ) + if ( tasks[ tid[k-1] ].rank > tasks[ tid[k-1] ].rank ) + error( "Task ranking failed." ); */ + } @@ -656,11 +661,12 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) { struct task *t, *tasks = s->tasks; // ticks tic; - /* Run throught the tasks backwards and set their waits. */ + /* Run throught the tasks and set their waits. */ // tic = getticks(); // #pragma omp parallel for schedule(static) private(t,j) - for ( k = nr_tasks-1 ; k >= 0 ; k-- ) { + for ( k = nr_tasks - 1 ; k >= 0 ; k-- ) { t = &tasks[ tid[k] ]; + t->wait = 0; if ( !( (1 << t->type) & mask ) || t->skip ) continue; for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) @@ -673,10 +679,13 @@ void scheduler_start ( struct scheduler *s , unsigned int mask ) { // #pragma omp parallel for schedule(static) private(t) for ( k = 0 ; k < nr_tasks ; k++ ) { t = &tasks[ tid[k] ]; - if ( t->rank > 0 ) - break; - if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 ) - scheduler_enqueue( s , t ); + if ( ( (1 << t->type) & mask ) && !t->skip && t->wait == 0 ) { + if ( t->implicit ) + for ( j = 0 ; j < t->nr_unlock_tasks ; j++ ) + atomic_dec( &t->unlock_tasks[j]->wait ); + else + scheduler_enqueue( s , t ); + } } // printf( "scheduler_start: enqueueing tasks took %.3f ms.\n" , (double)( getticks() - tic ) / CPU_TPS * 1000 ); @@ -744,55 +753,6 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) { } -/** - * @brief Take care of a tasks dependencies. - * - * @param s The #scheduler. - * @param t The finished #task. - */ - -void scheduler_done_old ( struct scheduler *s , struct task *t ) { - - int k, res; - struct task *t2; - - /* Release whatever locks this task held. */ - if ( !t->implicit ) - switch ( t->type ) { - case task_type_self: - case task_type_sort: - cell_unlocktree( t->ci ); - break; - case task_type_pair: - case task_type_sub: - cell_unlocktree( t->ci ); - if ( t->cj != NULL ) - cell_unlocktree( t->cj ); - break; - } - - /* Loop through the dependencies and add them to a queue if - they are ready. */ - for ( k = 0 ; k < t->nr_unlock_tasks ; k++ ) { - t2 = t->unlock_tasks[k]; - if ( ( res = atomic_dec( &t2->wait ) ) < 1 ) - error( "Negative wait!" ); - if ( res == 1 && !t2->skip ) - scheduler_enqueue( s , t2 ); - } - - /* Task definitely done. */ - if ( !t->implicit ) { - t->toc = getticks(); - pthread_mutex_lock( &s->sleep_mutex ); - atomic_dec( &s->waiting ); - pthread_cond_broadcast( &s->sleep_cond ); - pthread_mutex_unlock( &s->sleep_mutex ); - } - - } - - /** * @brief Take care of a tasks dependencies. * @@ -819,7 +779,7 @@ struct task *scheduler_done ( struct scheduler *s , struct task *t ) { t2 = t->unlock_tasks[k]; if ( ( res = atomic_dec( &t2->wait ) ) < 1 ) error( "Negative wait!" ); - if (res == 1 && !t2->skip ) { + if ( res == 1 && !t2->skip ) { if ( 0 && !t2->implicit && t2->ci->super == super && ( next == NULL || t2->weight > next->weight ) && diff --git a/src/space.c b/src/space.c index dcf8a0422fab886b4fc891ffbba540092ba26754..a1a933fe21852704652c4b3370a047623b5cea2e 100644 --- a/src/space.c +++ b/src/space.c @@ -856,6 +856,17 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N , /* Allocate the xtra parts array. */ if ( posix_memalign( (void *)&s->xparts , 32 , N * sizeof(struct xpart) ) != 0 ) error( "Failed to allocate xparts." ); + bzero( s->xparts , N * sizeof(struct xpart) ); + + /* Initialize the velocities and internal energies. */ + for ( int k = 0 ; k < N ; k++ ) { + struct part *p = &parts[k]; + struct xpart *xp = &s->xparts[k]; + xp->v_hdt[0] = p->v[0]; + xp->v_hdt[1] = p->v[1]; + xp->v_hdt[2] = p->v[2]; + xp->u_hdt = p->u; + } /* Init the space lock. */ if ( lock_init( &s->lock ) != 0 ) diff --git a/src/space.h b/src/space.h index 7ae8fefce07e09c5bc27faa8b2a0d23bf60f4e57..12f0ad97c96a00142b8254841dfd7335b678b343 100644 --- a/src/space.h +++ b/src/space.h @@ -28,7 +28,7 @@ #define space_subsize_default 5000 #define space_stretch 1.10f #define space_maxreldx 0.25f -#define space_qstack 1000 +#define space_qstack 1024 /* Convert cell location to ID. */