diff --git a/src/cell.c b/src/cell.c index b3612d6909e66984c6454c7af1c873434cad13ed..ee00db80d2eb2086de0b43799bc8b800eeba246c 100644 --- a/src/cell.c +++ b/src/cell.c @@ -253,6 +253,15 @@ void cell_split ( struct cell *c ) { c->progeny[k]->cparts = &c->cparts[ left[k] ]; } + /* Verify that _all_ the parts have been assigned to a cell. */ + /* for ( k = 1 ; k < 8 ; k++ ) + if ( &c->progeny[k-1]->parts[ c->progeny[k-1]->count ] != c->progeny[k]->parts ) + error( "Particle sorting failed (internal consistency)." ); + if ( c->progeny[0]->parts != c->parts ) + error( "Particle sorting failed (left edge)." ); + if ( &c->progeny[7]->parts[ c->progeny[7]->count ] != &c->parts[ c->count ] ) + error( "Particle sorting failed (right edge)." ); */ + /* Verify a few sub-cells. */ /* for ( k = 0 ; k < c->progeny[0]->count ; k++ ) if ( c->progeny[0]->parts[k].x[0] > pivot[0] || diff --git a/src/cell.h b/src/cell.h index cdc452f6e7d6a310fdeaa0cd4cb03d0e038515ef..dcd016ea90d5eab66a5d49d668db2ad9a746ec13 100644 --- a/src/cell.h +++ b/src/cell.h @@ -52,7 +52,7 @@ struct cell { float slack; /* Maximum particle movement in this cell. */ - float dx_max, h2dx_max; + float dx_max; /* The depth of this cell in the tree. */ int depth, split; @@ -117,7 +117,7 @@ struct cell { /* Linking pointer for "memory management". */ struct cell *next; - + } __attribute__((aligned (64))); diff --git a/src/engine.c b/src/engine.c index 447c9c0709b9d9dedd6130bfb752a4e6894efd25..1b6b188cb8f2dadd1c3e1acffd9f0086d3d64568 100644 --- a/src/engine.c +++ b/src/engine.c @@ -175,7 +175,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { int j, k; struct engine *e = (struct engine *)data; float pdt, dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; - float dt_min, dt_max, h_max, dx, h2dx_max, dx_max; + float dt_min, dt_max, h_max, dx, dx_max; float a[3], v[3], u, u_dt, h, h_dt, v_old[3], w, rho; double x[3], x_old[3]; struct part *restrict p; @@ -190,7 +190,6 @@ void engine_map_kick_first ( struct cell *c , void *data ) { dt_max = 0.0f; h_max = 0.0f; dx_max = 0.0f; - h2dx_max = 0.0f; /* Loop over parts. */ for ( k = 0 ; k < c->count ; k++ ) { @@ -229,7 +228,6 @@ void engine_map_kick_first ( struct cell *c , void *data ) { (x[1] - x_old[1])*(x[1] - x_old[1]) + (x[2] - x_old[2])*(x[2] - x_old[2]) ); dx_max = fmaxf( dx_max , dx ); - h2dx_max = fmaxf( h2dx_max , dx*2 + h*kernel_gamma ); /* Update positions and energies at the half-step. */ p->v[0] = v[0] += dt * a[0]; @@ -263,7 +261,7 @@ void engine_map_kick_first ( struct cell *c , void *data ) { rho = p->rho *= 1.0f + w*( 1.0f + w*( -0.5f + w*(1.0f/6.0f - 1.0f/24.0*w ) ) ); else */ rho = p->rho *= expf( w ); - p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho * rho ); + p->force.POrho2 = u * ( const_hydro_gamma - 1.0f ) / ( rho * xp->omega ); } else { p->density.wcount = 0.0f; @@ -288,7 +286,6 @@ void engine_map_kick_first ( struct cell *c , void *data ) { dt_max = c->progeny[k]->dt_max; h_max = c->progeny[k]->h_max; dx_max = c->progeny[k]->dx_max; - h2dx_max = c->progeny[k]->h2dx_max; /* Loop over the remaining progeny. */ for ( k += 1 ; k < 8 ; k++ ) @@ -297,7 +294,6 @@ void engine_map_kick_first ( struct cell *c , void *data ) { dt_max = fmaxf( dt_max , c->progeny[k]->dt_max ); h_max = fmaxf( h_max , c->progeny[k]->h_max ); dx_max = fmaxf( dx_max , c->progeny[k]->dx_max ); - h2dx_max = fmaxf( h2dx_max , c->progeny[k]->h2dx_max ); } } @@ -307,7 +303,6 @@ void engine_map_kick_first ( struct cell *c , void *data ) { c->dt_max = dt_max; c->h_max = h_max; c->dx_max = dx_max; - c->h2dx_max = h2dx_max; } @@ -334,8 +329,7 @@ void engine_collect_kick2 ( struct cell *c ) { /* Collect the values from the progeny. */ for ( k = 0 ; k < 8 ; k++ ) - if ( c->progeny[k] != NULL ) { - cp = c->progeny[k]; + if ( ( cp = c->progeny[k] ) != NULL ) { engine_collect_kick2( cp ); dt_min = fminf( dt_min , cp->dt_min ); dt_max = fmaxf( dt_max , cp->dt_max ); @@ -479,6 +473,7 @@ void engine_step ( struct engine *e , int sort_queues ) { float ang[3] = { 0.0 , 0.0 , 0.0 }; int count = 0; struct cell *c; + struct space *s = e->s; TIMER_TIC2 @@ -537,17 +532,17 @@ void engine_step ( struct engine *e , int sort_queues ) { /* Stop the clock. */ TIMER_TOC(timer_runners); - // engine_single_force( e->s->dim , 5497479945069 , e->s->parts , e->s->nr_parts , e->s->periodic ); + // engine_single_force( e->s->dim , 8328423931905 , e->s->parts , e->s->nr_parts , e->s->periodic ); // for(k=0; k<10; ++k) // printParticle(parts, k); // printParticle( parts , 432626 ); // printParticle( e->s->parts , 3392063069037 , e->s->nr_parts ); - // printParticle( e->s->parts , 5497479945069 , e->s->nr_parts ); + // printParticle( e->s->parts , 8328423931905 , e->s->nr_parts ); /* Collect the cell data from the second kick. */ - for ( k = 0 ; k < e->s->nr_cells ; k++ ) { - c = &e->s->cells[k]; + for ( k = 0 ; k < s->nr_cells ; k++ ) { + c = &s->cells[k]; engine_collect_kick2( c ); dt_min = fminf( dt_min , c->dt_min ); dt_max = fmaxf( dt_max , c->dt_max ); diff --git a/src/part.h b/src/part.h index cd358779e0cf91f751c44587d0aaf72068b36136..c7fb3e86b741e6bf0a27bbbbf0ba44d6c78e4a24 100644 --- a/src/part.h +++ b/src/part.h @@ -51,6 +51,9 @@ struct xpart { /* Old entropy. */ float u_old; + /* Old density. */ + float omega; + } __attribute__((aligned (32))); diff --git a/src/runner.c b/src/runner.c index 785bb72e2248105f9811ba2928d983a287048085..ed9b4cd0709d98e46e00c9d6908cdd9b51475fc2 100644 --- a/src/runner.c +++ b/src/runner.c @@ -544,6 +544,7 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { if ( pdt <= dt_step ) { h_dt = p->force.h_dt *= h * 0.333333333f; count += 1; + xp->omega = 1.0f + h * p->rho_dh / p->rho * 0.3333333333f; } else h_dt = p->force.h_dt; @@ -561,10 +562,10 @@ void runner_dokick2 ( struct runner *r , struct cell *c ) { p->dt = pdt = fminf( dt_new , pdt ); /* Update positions and energies at the half-step. */ - p->v[0] = v[0] = xp->v_old[0] + hdt * p->a[0]; - p->v[1] = v[1] = xp->v_old[1] + hdt * p->a[1]; - p->v[2] = v[2] = xp->v_old[2] + hdt * p->a[2]; - p->u = u = xp->u_old + hdt * u_dt; + p->v[0] = ( v[0] = xp->v_old[0] + hdt * p->a[0] ); + p->v[1] = ( v[1] = xp->v_old[1] + hdt * p->a[1] ); + p->v[2] = ( v[2] = xp->v_old[2] + hdt * p->a[2] ); + p->u = ( u = xp->u_old + hdt * u_dt ); /* Get the smallest/largest dt. */ dt_min = fminf( dt_min , pdt ); diff --git a/src/runner_doiact.h b/src/runner_doiact.h index 9efb74f364288cb07edc200b309346ca27cab99e..eeed316f0fe89ae5b419fbfed9bd2c5694ff7331 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1613,7 +1613,7 @@ void DOSUB1 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) { /* Recurse? */ if ( ci->split && cj->split && - ci->h2dx_max*2 < h && cj->h2dx_max*2 < h ) { + fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max < h ) { /* Different types of flags. */ switch ( sid ) { @@ -1893,7 +1893,7 @@ void DOSUB2 ( struct runner *r , struct cell *ci , struct cell *cj , int sid ) { /* Recurse? */ if ( ci->split && cj->split && - ci->h2dx_max*2 < h && cj->h2dx_max*2 < h ) { + fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max < h ) { /* Different types of flags. */ switch ( sid ) { @@ -2175,7 +2175,7 @@ void DOSUB_SUBSET ( struct runner *r , struct cell *ci , struct part *parts , in /* Recurse? */ if ( ci->split && cj->split && - ci->h2dx_max*2 < h && cj->h2dx_max*2 < h ) { + fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max < h ) { /* Get the type of pair if not specified explicitly. */ sid = space_getsid( s , &ci , &cj , shift ); diff --git a/src/space.c b/src/space.c index 5e7214b9a91359466634e5a25fba8b2972a8cc45..65c6a6711730c14b9b0d01ca474c9a104dc1b1b7 100644 --- a/src/space.c +++ b/src/space.c @@ -119,16 +119,16 @@ int space_marktasks ( struct space *s ) { /* Pair? */ else if ( t->type == task_type_pair || ( t->type == task_type_sub && t->cj != NULL ) ) { - /* Set this task's skip. */ - t->skip = ( t->ci->dt_min > dt_step && t->cj->dt_min > dt_step ); - /* Local pointers. */ ci = t->ci; cj = t->cj; + /* Set this task's skip. */ + t->skip = ( ci->dt_min > dt_step && cj->dt_min > dt_step ); + /* Too much particle movement? */ if ( t->tight && - ( ci->h2dx_max > ci->dmin || cj->h2dx_max > cj->dmin || + ( fmaxf( ci->h_max , cj->h_max ) + ci->dx_max + cj->dx_max > cj->dmin || ci->dx_max > space_maxreldx*ci->h_max || cj->dx_max > space_maxreldx*cj->h_max ) ) return 1; @@ -142,6 +142,10 @@ int space_marktasks ( struct space *s ) { } + /* Kick2? */ + else if ( t->type == task_type_kick2 ) + t->skip = 0; + /* None? */ else if ( t->type == task_type_none ) t->skip = 1; @@ -343,6 +347,7 @@ void space_rebuild ( struct space *s , double cell_max ) { struct part *restrict finger, *restrict p, *parts = s->parts; struct cpart *restrict cfinger; int *ind; + double ih[3], dim[3]; // ticks tic; /* Be verbose about this. */ @@ -412,6 +417,7 @@ void space_rebuild ( struct space *s , double cell_max ) { c->h[0] = s->h[0]; c->h[1] = s->h[1]; c->h[2] = s->h[2]; c->dmin = dmin; c->depth = 0; + c->count = 0; lock_init( &c->lock ); } @@ -431,8 +437,9 @@ void space_rebuild ( struct space *s , double cell_max ) { s->cells[k].nr_tasks = 0; s->cells[k].nr_density = 0; s->cells[k].dx_max = 0.0f; - s->cells[k].h2dx_max = 0.0f; s->cells[k].sorted = 0; + s->cells[k].count = 0; + s->cells[k].kick2 = NULL; } s->maxdepth = 0; @@ -442,26 +449,35 @@ void space_rebuild ( struct space *s , double cell_max ) { // tic = getticks(); if ( ( ind = (int *)malloc( sizeof(int) * s->nr_parts ) ) == NULL ) error( "Failed to allocate temporary particle indices." ); - for ( k = 0 ; k < s->nr_cells ; k++ ) - s->cells[ k ].count = 0; + ih[0] = s->ih[0]; ih[1] = s->ih[1]; ih[2] = s->ih[2]; + dim[0] = s->dim[0]; dim[1] = s->dim[1]; dim[2] = s->dim[2]; #pragma omp parallel for private(p,j) for ( k = 0 ; k < nr_parts ; k++ ) { p = &parts[k]; for ( j = 0 ; j < 3 ; j++ ) if ( p->x[j] < 0.0 ) - p->x[j] += s->dim[j]; - else if ( p->x[j] >= s->dim[j] ) - p->x[j] -= s->dim[j]; - ind[k] = cell_getid( s->cdim , p->x[0]*s->ih[0] , p->x[1]*s->ih[1] , p->x[2]*s->ih[2] ); + p->x[j] += dim[j]; + else if ( p->x[j] >= dim[j] ) + p->x[j] -= dim[j]; + ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] ); atomic_inc( &s->cells[ ind[k] ].count ); } // printf( "space_rebuild: getting particle indices took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); /* Sort the parts according to their cells. */ // tic = getticks(); - parts_sort( parts , ind , s->nr_parts , 0 , s->nr_cells ); + parts_sort( parts , ind , s->nr_parts , 0 , s->nr_cells-1 ); // printf( "space_rebuild: parts_sort took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); + /* Verify sort. */ + /* for ( k = 1 ; k < nr_parts ; k++ ) { + if ( ind[k-1] > ind[k] ) { + error( "Sort failed!" ); + } + else if ( ind[k] != cell_getid( cdim , parts[k].x[0]*ih[0] , parts[k].x[1]*ih[1] , parts[k].x[2]*ih[2] ) ) + error( "Incorrect indices!" ); + } */ + /* We no longer need the indices as of here. */ free( ind ); @@ -527,6 +543,11 @@ void parts_sort_rec ( struct part *parts , int *ind , int N , int min , int max ind[j] = temp_i; parts[j] = temp_p; } + + /* Verify sort. */ + /* for ( i = 1 ; i < N ; i++ ) + if ( ind[i-1] > ind[i] ) + error( "Insert-sort failed!" ); */ } @@ -1312,7 +1333,6 @@ void space_split ( struct space *s , struct cell *c ) { temp->split = 0; temp->h_max = 0.0; temp->dx_max = 0.0; - temp->h2dx_max = 0.0; temp->parent = c; c->progeny[k] = temp; } diff --git a/src/space.h b/src/space.h index fdd1ab8c5a0443418c98ee8ed08152d584ec6c0f..9b9308c8b2a7e1dae49e5d22f8a1f94b9d51ac6a 100644 --- a/src/space.h +++ b/src/space.h @@ -23,11 +23,11 @@ /* Some constants. */ #define space_maxdepth 10 #define space_cellallocchunk 1000 -#define space_splitratio 0.875 +#define space_splitratio 0.875f #define space_splitsize_default 400 #define space_subsize_default 5000 -#define space_dosub 1 -#define space_stretch 1.05 +#define space_dosub 0 +#define space_stretch 1.05f #define space_maxtaskspercell 31 #define space_maxreldx 0.2f