diff --git a/src/cell.h b/src/cell.h index f7f3246800bcdbc3d55f99120fde96df4606ced9..cdc452f6e7d6a310fdeaa0cd4cb03d0e038515ef 100644 --- a/src/cell.h +++ b/src/cell.h @@ -20,6 +20,7 @@ /* Some constants. */ #define cell_sid_dt 13 + /* The queue timers. */ enum { cell_timer_none = 0, diff --git a/src/const.h b/src/const.h index 0f68c9be92bb222c1dbaa134a71e3cded9d12511..09e0eb87a814f637cd4cc39f4f042e5615fc8ade 100644 --- a/src/const.h +++ b/src/const.h @@ -26,6 +26,7 @@ /* Time integration constants. */ #define const_cfl 0.3f #define const_ln_max_h_change 0.231111721f /* Particle can't change volume by more than a factor of 2=1.26^3 over one time step */ +#define const_max_u_change 0.1f /* Neighbour search constants. */ #define const_eta_kernel 1.2348f /* Corresponds to 48 ngbs with the cubic spline kernel */ diff --git a/src/engine.c b/src/engine.c index ac906099f53857e1dbff7597b11dd476f5ddc9b7..653255a3bca2ad3f4a1155232a732688832082fd 100644 --- a/src/engine.c +++ b/src/engine.c @@ -176,7 +176,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { struct engine *e = (struct engine *)data; float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; float dt_min, dt_max, h_max, dx, h2dx_max, dx_max; - float a[3], v[3], u, u_dt, h, h_dt, rho, v_old[3], w; + float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho; double x[3], x_old[3]; struct part *restrict p; struct xpart *restrict xp; @@ -236,14 +236,14 @@ void engine_map_kick_first ( struct cell *c , void *data ) { p->v[1] = v[1] += dt * a[1]; p->v[2] = v[2] += dt * a[2]; w = u_dt / u * dt; - if ( fabsf( w ) < 0.05f ) + /* if ( fabsf( w ) < 0.05f ) p->u = u *= 1.0f + w*( 1.0f + w*( -0.5f + w*1.0f/6.0f ) ); - else + else */ p->u = u *= expf( w ); w = h_dt / h * dt; - if ( fabsf( w ) < 0.05f ) + /* if ( fabsf( w ) < 0.05f ) p->h = h *= 1.0f + w*( 1.0f + w*( -0.5f + w*1.0f/6.0f ) ); - else + else */ p->h = h *= expf( w ); h_max = fmaxf( h_max , h ); @@ -259,11 +259,11 @@ void engine_map_kick_first ( struct cell *c , void *data ) { /* Init fields for density calculation. */ if ( pdt > dt_step ) { float w = -3.0f * h_dt / h * dt; - if ( fabsf( w ) < 0.1f ) + /* if ( fabsf( w ) < 0.1f ) rho = p->rho *= 1.0f + w*( 1.0f + w*( -0.5f + w*(1.0f/6.0f - 1.0f/24.0*w ) ) ); - else + else */ rho = p->rho *= expf( w ); - p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho + h * p->rho_dh / 3.0f ); + p->force.POrho2 = u * ( const_gamma - 1.0f ) / ( rho * rho ); } else { p->density.wcount = 0.0f; @@ -407,7 +407,7 @@ void engine_single_density ( double *dim , long long int pid , struct part *__re p.rho = ih * ih * ih * ( p.rho + p.mass*kernel_root ); p.rho_dh = p.rho_dh * ih * ih * ih * ih; p.density.wcount = ( p.density.wcount + kernel_root ) * ( 4.0f / 3.0 * M_PI * kernel_gamma3 ); - printf( "pairs_single: part %lli (h=%e) has wcount=%e, rho=%e, rho_dh=%e.\n" , p.id , p.h , p.density.wcount , p.rho , p.rho_dh ); + printf( "pairs_single_density: part %lli (h=%e) has wcount=%e, rho=%e, rho_dh=%e.\n" , p.id , p.h , p.density.wcount , p.rho , p.rho_dh ); fflush(stdout); } @@ -447,12 +447,17 @@ void engine_single_force ( double *dim , long long int pid , struct part *__rest } r2 = fdx[0]*fdx[0] + fdx[1]*fdx[1] + fdx[2]*fdx[2]; if ( r2 < p.h*p.h*kernel_gamma2 || r2 < parts[k].h*parts[k].h*kernel_gamma2 ) { + p.a[0] = 0.0f; p.a[1] = 0.0f; p.a[2] = 0.0f; + p.force.u_dt = 0.0f; p.force.h_dt = 0.0f; p.force.v_sig = 0.0f; runner_iact_nonsym_force( r2 , fdx , p.h , parts[k].h , &p , &parts[k] ); + double dvdr = ( (p.v[0]-parts[k].v[0])*fdx[0] + (p.v[1]-parts[k].v[1])*fdx[1] + (p.v[2]-parts[k].v[2])*fdx[2] ) / sqrt(r2); + printf( "pairs_single_force: part %lli and %lli interact (r=%.3e,dvdr=%.3e) with a=[%.3e,%.3e,%.3e], dudt=%.3e.\n" , + p.id , parts[k].id , sqrt(r2) , dvdr , p.a[0] , p.a[1], p.a[2] , p.force.u_dt ); } } /* Dump the result. */ - printf( "pairs_single: part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.\n" , p.id , p.h , p.a[0] , p.a[1] , p.a[2] , p.force.u_dt ); + // printf( "pairs_single_force: part %lli (h=%e) has a=[%.3e,%.3e,%.3e], udt=%e.\n" , p.id , p.h , p.a[0] , p.a[1] , p.a[2] , p.force.u_dt ); fflush(stdout); } @@ -478,18 +483,18 @@ void engine_step ( struct engine *e , int sort_queues ) { TIMER_TIC2 /* Get the maximum dt. */ - if ( e->policy & engine_policy_fixdt ) - dt_step = FLT_MAX; - else { + if ( e->policy & engine_policy_multistep ) { dt_step = 2.0f*dt; for ( k = 0 ; k < 32 && (e->step & (1 << k)) == 0 ; k++ ) dt_step *= 2; } + else + dt_step = FLT_MAX; /* Set the maximum dt. */ e->dt_step = dt_step; e->s->dt_step = dt_step; - printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout); + // printf( "engine_step: dt_step set to %.3e (dt=%.3e).\n" , dt_step , e->dt ); fflush(stdout); // printParticle( parts , 432626 ); @@ -500,7 +505,7 @@ void engine_step ( struct engine *e , int sort_queues ) { // for(k=0; k<10; ++k) // printParticle(parts, k); - // printParticle( e->s->parts , 382557 , e->s->nr_parts ); + // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts ); /* Prepare the space. */ engine_prepare( e ); @@ -514,7 +519,7 @@ void engine_step ( struct engine *e , int sort_queues ) { } } - // engine_single_density( e->s->dim , 7503966371841 , e->s->parts , e->s->nr_parts , e->s->periodic ); + // engine_single_density( e->s->dim , 3392063069037 , e->s->parts , e->s->nr_parts , e->s->periodic ); /* Start the clock. */ TIMER_TIC_ND @@ -532,12 +537,13 @@ void engine_step ( struct engine *e , int sort_queues ) { /* Stop the clock. */ TIMER_TOC(timer_runners); - // engine_single_force( e->s->dim , 7503966371841 , e->s->parts , e->s->nr_parts , e->s->periodic ); + // engine_single_force( e->s->dim , 5497479945069 , e->s->parts , e->s->nr_parts , e->s->periodic ); // for(k=0; k<10; ++k) // printParticle(parts, k); // printParticle( parts , 432626 ); - // printParticle( e->s->parts , 7503966371841 , e->s->nr_parts ); + // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts ); + // printParticle( e->s->parts , 5497479945069 , e->s->nr_parts ); /* Collect the cell data from the second kick. */ for ( k = 0 ; k < e->s->nr_cells ; k++ ) { @@ -553,13 +559,17 @@ void engine_step ( struct engine *e , int sort_queues ) { } e->dt_min = dt_min; + e->dt_max = dt_max; + e->count_step = count; + e->ekin = ekin; + e->epot = epot; // printParticle( e->s->parts , 382557 , e->s->nr_parts ); - printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout); - printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout); - printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout); - printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout); + // printf( "engine_step: dt_min/dt_max is %e/%e.\n" , dt_min , dt_max ); fflush(stdout); + // printf( "engine_step: etot is %e (ekin=%e, epot=%e).\n" , ekin+epot , ekin , epot ); fflush(stdout); + // printf( "engine_step: total momentum is [ %e , %e , %e ].\n" , mom[0] , mom[1] , mom[2] ); fflush(stdout); + // printf( "engine_step: total angular momentum is [ %e , %e , %e ].\n" , ang[0] , ang[1] , ang[2] ); fflush(stdout); // printf( "engine_step: total entropic function is %e .\n", ent ); fflush(stdout); - printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout); + // printf( "engine_step: updated %i parts (dt_step=%.3e).\n" , count , dt_step ); fflush(stdout); /* Increase the step. */ e->step += 1; @@ -576,18 +586,20 @@ void engine_step ( struct engine *e , int sort_queues ) { e->dt *= 0.5; while ( dt_min > 2*e->dt ) e->dt *= 2.0; - printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt ); + // printf( "engine_step: dt_min=%.3e, adjusting time step to dt=%e.\n" , dt_min , e->dt ); } else { while ( dt_min < e->dt ) { e->dt *= 0.5; e->step *= 2; - printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt ); + e->nullstep *= 2; + // printf( "engine_step: dt_min dropped below time step, adjusting to dt=%e.\n" , e->dt ); } while ( dt_min > 2*e->dt && (e->step & 1) == 0 ) { e->dt *= 2.0; e->step /= 2; - printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt ); + e->nullstep /= 2; + // printf( "engine_step: dt_min is larger than twice the time step, adjusting to dt=%e.\n" , e->dt ); } } } diff --git a/src/engine.h b/src/engine.h index 4e29a7b670790d094fb7616777c39ea9ef7da464..2d3c6a3575ad8d53fdbdf0e8f5768ad0e0128b76 100644 --- a/src/engine.h +++ b/src/engine.h @@ -26,6 +26,7 @@ #define engine_policy_keep 4 #define engine_policy_block 8 #define engine_policy_fixdt 16 +#define engine_policy_multistep 32 #define engine_queue_scale 1.2 @@ -55,14 +56,20 @@ struct engine { float dt_step; /* The minimum dt over all particles in the system. */ - float dt_min; + float dt_min, dt_max; /* The system time step. */ float dt, dt_orig; + /* The system energies from the previous step. */ + double ekin, epot; + /* The current step number. */ int step, nullstep; + /* The number of particles updated in the previous step. */ + int count_step; + /* The current system time. */ float time; diff --git a/src/runner.c b/src/runner.c index bc9345c05a79f99f27ecd9bb2ea92c4c8feffb6f..d8c032b3917a30f5a528bd9c959f6cf72b84c175 100644 --- a/src/runner.c +++ b/src/runner.c @@ -519,7 +519,8 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { float mom[3] = { 0.0f , 0.0f , 0.0f }, ang[3] = { 0.0f , 0.0f , 0.0f }; float x[3], v[3], u, h, pdt, m; float dt_step = r->e->dt_step, dt = r->e->dt, hdt = 0.5f*dt; - float dt_cfl, dt_h_change; + float dt_cfl, dt_h_change, dt_u_change, dt_new; + float h_dt, u_dt; struct part *p, *parts = c->parts; struct xpart *xp; @@ -534,31 +535,36 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { /* Get local copies of particle data. */ pdt = p->dt; + u_dt = p->force.u_dt; h = p->h; m = p->mass; x[0] = p->x[0]; x[1] = p->x[1]; x[2] = p->x[2]; /* Scale the derivatives if they're freshly computed. */ if ( pdt <= dt_step ) { - p->force.h_dt *= h * 0.333333333f; + h_dt = p->force.h_dt *= h * 0.333333333f; count += 1; } + else + h_dt = p->force.h_dt; /* Update the particle's time step. */ dt_cfl = const_cfl * h / p->force.v_sig; - dt_h_change = fabsf( const_ln_max_h_change * h / p->force.h_dt ); + dt_h_change = ( h_dt != 0.0f ) ? fabsf( const_ln_max_h_change * h / h_dt ) : FLT_MAX; + dt_u_change = ( u_dt != 0.0f ) ? fabsf( const_max_u_change * p->u / u_dt ) : FLT_MAX; + dt_new = fminf( dt_cfl , fminf( dt_h_change , dt_u_change ) ); if ( pdt == 0.0f ) - p->dt = pdt = fminf( dt_cfl , dt_h_change ); + p->dt = pdt = dt_new; else if ( pdt <= dt_step ) - p->dt = pdt = fminf( fminf( dt_cfl , dt_h_change ) , 2.0f*pdt ); + p->dt = pdt = fminf( dt_new , 2.0f*pdt ); else - p->dt = pdt = fminf( fminf( dt_cfl , dt_h_change ) , pdt ); + p->dt = pdt = fminf( dt_new , pdt ); /* Update positions and energies at the half-step. */ p->v[0] = v[0] = xp->v_old[0] + hdt * p->a[0]; p->v[1] = v[1] = xp->v_old[1] + hdt * p->a[1]; p->v[2] = v[2] = xp->v_old[2] + hdt * p->a[2]; - p->u = u = xp->u_old + hdt * p->force.u_dt; + p->u = u = xp->u_old + hdt * u_dt; /* Get the smallest/largest dt. */ dt_min = fminf( dt_min , pdt ); diff --git a/src/runner_iact.h b/src/runner_iact.h index 76f253048f4387df1eb1a3b70c391e404ca2ac0c..05c73c047b7b627ad582ca22a34e296c9e7a072c 100644 --- a/src/runner_iact.h +++ b/src/runner_iact.h @@ -366,7 +366,7 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2 float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; float mi, mj, POrho2i, POrho2j, rhoi, rhoj; float v_sig, omega_ij, Pi_ij; - float dt_max; + // float dt_max; float f; int k; @@ -407,9 +407,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_force ( float r2 Pi_ij *= ( pi->force.balsara + pj->force.balsara ); /* Volker's modified viscosity */ - dt_max = fmaxf(pi->dt, pj->dt); - if(dt_max > 0 && (wi_dr + wj_dr) < 0.) - Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) ); + /* dt_max = fmaxf(pi->dt, pj->dt); + if( dt_max > 0 && (wi_dr + wj_dr) < 0. ) + Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) ); */ /* Get the common factor out. */ w = ri * ( ( POrho2i * wi_dr + POrho2j * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) ); @@ -600,14 +600,15 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl float hi_inv, hi2_inv; float hj_inv, hj2_inv; float wi, wj, wi_dx, wj_dx, wi_dr, wj_dr, w, dvdr; - float mi, mj, POrho2i, POrho2j, rhoi, rhoj; + float /*mi,*/ mj, POrho2i, POrho2j, rhoi, rhoj; float v_sig, omega_ij, Pi_ij; - float dt_max; + // float dt_max; float f; int k; /* Get some values in local variables. */ - mi = pi->mass; mj = pj->mass; + // mi = pi->mass; + mj = pj->mass; rhoi = pi->rho; rhoj = pj->rho; POrho2i = pi->force.POrho2; POrho2j = pj->force.POrho2; @@ -643,9 +644,9 @@ __attribute__ ((always_inline)) INLINE static void runner_iact_nonsym_force ( fl Pi_ij *= ( pi->force.balsara + pj->force.balsara ); /* Volker's modified viscosity */ - dt_max = fmaxf(pi->dt, pj->dt); + /* dt_max = fmaxf(pi->dt, pj->dt); if(dt_max > 0 && (wi_dr + wj_dr) < 0.) - Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) ); + Pi_ij = fminf( Pi_ij, 2.f * omega_ij / ( ( mi + mj ) * ( wi_dr + wj_dr ) * dt_max ) ); */ /* Get the common factor out. */ w = ri * ( ( POrho2i * wi_dr + POrho2j * wj_dr ) + 0.25f * Pi_ij * ( wi_dr + wj_dr ) ); diff --git a/src/space.c b/src/space.c index a5f9c211210ce218cb4253d705d30c50749116e7..5e7214b9a91359466634e5a25fba8b2972a8cc45 100644 --- a/src/space.c +++ b/src/space.c @@ -88,6 +88,7 @@ int space_marktasks ( struct space *s ) { int k, nr_tasks = s->nr_tasks, *ind = s->tasks_ind; struct task *t, *tasks = s->tasks; float dt_step = s->dt_step; + struct cell *ci, *cj; /* Run through the tasks and mark as skip or not. */ for ( k = 0 ; k < nr_tasks ; k++ ) { @@ -121,17 +122,22 @@ int space_marktasks ( struct space *s ) { /* Set this task's skip. */ t->skip = ( t->ci->dt_min > dt_step && t->cj->dt_min > dt_step ); + /* Local pointers. */ + ci = t->ci; + cj = t->cj; + /* Too much particle movement? */ if ( t->tight && - ( t->ci->h2dx_max > t->ci->dmin || t->cj->h2dx_max > t->cj->dmin ) ) + ( ci->h2dx_max > ci->dmin || cj->h2dx_max > cj->dmin || + ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) ) return 1; /* Set the sort flags. */ if ( !t->skip && t->type == task_type_pair ) { - t->ci->sorts->flags |= (1 << t->flags); - t->ci->sorts->skip = 0; - t->cj->sorts->flags |= (1 << t->flags); - t->cj->sorts->skip = 0; + ci->sorts->flags |= (1 << t->flags); + ci->sorts->skip = 0; + cj->sorts->flags |= (1 << t->flags); + cj->sorts->skip = 0; } } diff --git a/src/space.h b/src/space.h index d5cb42f14dff889d9c3f885461099bb6014f4ff8..fdd1ab8c5a0443418c98ee8ed08152d584ec6c0f 100644 --- a/src/space.h +++ b/src/space.h @@ -29,6 +29,7 @@ #define space_dosub 1 #define space_stretch 1.05 #define space_maxtaskspercell 31 +#define space_maxreldx 0.2f /* Convert cell location to ID. */