diff --git a/src/engine.c b/src/engine.c index 34208b2153e51dc61e81ee7aef76563c55fb8cc6..a5f5f0c59e5df3acff69797e997b866dc75f8701 100644 --- a/src/engine.c +++ b/src/engine.c @@ -72,7 +72,7 @@ void engine_prepare ( struct engine *e ) { /* Rebuild the space. */ // tic = getticks(); space_prepare( e->s ); - // printf( "engine_prepare: space_prepare with %i changes took %.3f ms.\n" , changes , (double)(getticks() - tic) / CPU_TPS * 1000 ); + // printf( "engine_prepare: space_prepare took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); // tic = getticks(); /* Init the queues (round-robin). */ @@ -92,7 +92,7 @@ void engine_prepare ( struct engine *e ) { /* Re-set the particle data. */ // tic = getticks(); -#pragma omp parallel for schedule(static) private(j) + #pragma omp parallel for schedule(static) private(j) for ( k = 0 ; k < s->nr_parts ; k++ ) if ( s->parts[k].dt <= dt_step ) { s->parts[k].wcount = 0.0f; @@ -170,6 +170,127 @@ void engine_barrier( struct engine *e ) { } +/** + * @brief Mapping function to set dt_min and dt_max, do the first + * kick. + */ + +void engine_map_kick_first ( struct cell *c , void *data ) { + + int j, k; + struct engine *e = (struct engine *)data; + float dt_step = e->dt_step, dt = e->dt, hdt = 0.5f*dt; + float dt_min, dt_max, h_max, dx_max; + struct part *restrict p; + struct xpart *restrict xp; + struct cpart *restrict cp; + + /* No children? */ + if ( !c->split ) { + + /* Init the min/max counters. */ + dt_min = FLT_MAX; + dt_max = 0.0f; + h_max = 0.0f; + dx_max = 0.0f; + + /* Loop over parts. */ + for ( k = 0 ; k < c->count ; k++ ) { + + /* Get a handle on the kth particle. */ + p = &c->parts[k]; + xp = p->xtras; + cp = &c->cparts[k]; + + /* Store the min/max dt. */ + dt_min = fminf( dt_min , p->dt ); + dt_max = fmaxf( dt_max , p->dt ); + + /* Step and store the velocity and internal energy. */ + xp->v_old[0] = p->v[0] + hdt * p->a[0]; + xp->v_old[1] = p->v[1] + hdt * p->a[1]; + xp->v_old[2] = p->v[2] + hdt * p->a[2]; + xp->u_old = p->u + hdt * p->force.u_dt; + + /* Move the particles with the velocitie at the half-step. */ + p->x[0] += dt * xp->v_old[0]; + p->x[1] += dt * xp->v_old[1]; + p->x[2] += dt * xp->v_old[2]; + dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + + (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + + (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h ); + + /* Update positions and energies at the half-step. */ + p->v[0] += dt * p->a[0]; + p->v[1] += dt * p->a[1]; + p->v[2] += dt * p->a[2]; + p->u *= expf( p->force.u_dt / p->u * dt ); + p->h *= expf( p->force.h_dt / p->h * dt ); + h_max = fmaxf( h_max , p->h ); + + /* Integrate other values if this particle will not be updated. */ + if ( p->dt > dt_step ) { + p->rho *= expf( -3.0f * p->force.h_dt / p->h * dt ); + p->force.POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f ); + } + + /* Fill the cpart. */ + cp->x[0] = p->x[0]; + cp->x[1] = p->x[1]; + cp->x[2] = p->x[2]; + cp->h = p->h; + cp->dt = p->dt; + + /* Init fields for density calculation. */ + if ( p->dt <= dt_step ) { + p->wcount = 0.0f; + p->density.wcount_dh = 0.0f; + p->rho = 0.0f; + p->rho_dh = 0.0f; + p->density.div_v = 0.0f; + for ( j = 0 ; j < 3 ; ++j) + p->density.curl_v[j] = 0.0f; + } + + } + + } + + /* Otherwise, agregate data from children. */ + else { + + /* Init with the first non-null child. */ + for ( k = 0 ; c->progeny[k] == 0 ; k++ ); + dt_min = c->progeny[k]->dt_min; + dt_max = c->progeny[k]->dt_max; + h_max = c->progeny[k]->h_max; + dx_max = c->progeny[k]->dx_max; + + /* Loop over the remaining progeny. */ + for ( k += 1 ; k < 8 ; k++ ) + if ( c->progeny[k] != NULL ) { + dt_min = fminf( dt_min , c->progeny[k]->dt_min ); + dt_max = fmaxf( dt_max , c->progeny[k]->dt_max ); + h_max = fmaxf( h_max , c->progeny[k]->h_max ); + dx_max = fmaxf( dx_max , c->progeny[k]->dx_max ); + } + + } + + /* Store the values. */ + c->dt_min = dt_min; + c->dt_max = dt_max; + c->h_max = h_max; + c->dx_max = dx_max; + + /* Clean out the task pointers. */ + c->sorts[0] = NULL; + c->nr_tasks = 0; + c->nr_density = 0; + + } + + /** * @brief Let the #engine loose to compute the forces. * @@ -201,38 +322,7 @@ void engine_step ( struct engine *e , int sort_queues ) { /* First kick. */ TIMER_TIC - #pragma omp parallel for schedule(static) private(p,xp) - for ( k = 0 ; k < nr_parts ; k++ ) { - - /* Get a handle on the part. */ - p = &parts[k]; - xp = p->xtras; - - /* Step and store the velocity and internal energy. */ - xp->v_old[0] = p->v[0] + hdt * p->a[0]; - xp->v_old[1] = p->v[1] + hdt * p->a[1]; - xp->v_old[2] = p->v[2] + hdt * p->a[2]; - xp->u_old = p->u + hdt * p->force.u_dt; - - /* Move the particles with the velocitie at the half-step. */ - p->x[0] += dt * xp->v_old[0]; - p->x[1] += dt * xp->v_old[1]; - p->x[2] += dt * xp->v_old[2]; - - /* Update positions and energies at the half-step. */ - p->v[0] += dt * p->a[0]; - p->v[1] += dt * p->a[1]; - p->v[2] += dt * p->a[2]; - p->u *= expf( p->force.u_dt / p->u * dt ); - p->h *= expf( p->force.h_dt / p->h * dt ); - - /* Integrate other values if this particle will not be updated. */ - if ( p->dt > dt_step ) { - p->rho *= expf( -3.0f * p->force.h_dt / p->h * dt ); - p->force.POrho2 = p->u * ( const_gamma - 1.0f ) / ( p->rho + p->h * p->rho_dh / 3.0f ); - } - - } + space_map_cells_post( e->s , 1 , &engine_map_kick_first , e ); TIMER_TOC( timer_kick1 ); // for(k=0; k<10; ++k) diff --git a/src/space.c b/src/space.c index 934663223e312c02f56b37afada247f955b0a5d8..3754575cca0dd033f6d45724965ecf3be70bcf5b 100644 --- a/src/space.c +++ b/src/space.c @@ -85,8 +85,9 @@ void space_map_prepare ( struct cell *c , void *data ) { int k; float dt_min, dt_max, h_max, dx_max; - struct part *p; - struct xpart *xp; + struct part *restrict p; + struct xpart *restrict xp; + struct cpart *restrict cp; /* No children? */ if ( !c->split ) { @@ -94,23 +95,36 @@ void space_map_prepare ( struct cell *c , void *data ) { /* Init with first part. */ p = &c->parts[0]; xp = p->xtras; + cp = &c->cparts[0]; + dt_min = p->dt; dt_max = p->dt; h_max = p->h; dx_max = sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h; + cp->x[0] = p->x[0]; + cp->x[1] = p->x[1]; + cp->x[2] = p->x[2]; + cp->h = p->h; + cp->dt = p->dt; /* Loop over parts. */ for ( k = 1 ; k < c->count ; k++ ) { p = &c->parts[k]; xp = p->xtras; + cp = &c->cparts[0]; dt_min = fminf( dt_min , p->dt ); dt_max = fmaxf( dt_max , p->dt ); h_max = fmaxf( h_max , p->h ); dx_max = fmaxf( dx_max , sqrtf( (p->x[0] - xp->x_old[0])*(p->x[0] - xp->x_old[0]) + (p->x[1] - xp->x_old[1])*(p->x[1] - xp->x_old[1]) + (p->x[2] - xp->x_old[2])*(p->x[2] - xp->x_old[2]) )*2 + p->h ); + cp->x[0] = p->x[0]; + cp->x[1] = p->x[1]; + cp->x[2] = p->x[2]; + cp->h = p->h; + cp->dt = p->dt; } } @@ -166,9 +180,12 @@ void space_prepare ( struct space *s ) { struct task *t; float dt_step = s->dt_step, dx_max = 0.0f; int counts[ task_type_count + 1 ]; + ticks tic; /* Traverse the cells and set their dt_min and dx_max. */ - space_map_cells_post( s , 1 , &space_map_prepare , NULL ); + // tic = getticks(); + // space_map_cells_post( s , 1 , &space_map_prepare , NULL ); + // printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); /* Get the maximum displacement in the whole system. */ for ( k = 0 ; k < s->nr_cells ; k++ ) @@ -176,6 +193,7 @@ void space_prepare ( struct space *s ) { printf( "space_prepare: dx_max is %e.\n" , dx_max ); /* Run through the tasks and mark as skip or not. */ + tic = getticks(); for ( k = 0 ; k < s->nr_tasks ; k++ ) { t = &s->tasks[k]; if ( t->type == task_type_sort || @@ -190,34 +208,30 @@ void space_prepare ( struct space *s ) { break; } } + printf( "space_prepare: checking tasks took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); /* Did this not go through? */ if ( k < s->nr_tasks ) { /* Re-build the space. */ + tic = getticks(); space_rebuild( s , 0.0 ); + printf( "space_prepare: space_rebuild took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); /* Traverse the cells and set their dt_min and dx_max. */ + tic = getticks(); space_map_cells_post( s , 1 , &space_map_prepare , NULL ); + printf( "space_prepare: space_map_prepare took %.3f ms.\n" , (double)(getticks() - tic)/CPU_TPS*1000 ); } - /* Store the condensed particle data. */ - #pragma omp parallel for schedule(static) - for ( k = 0 ; k < s->nr_parts ; k++ ) { - s->cparts[k].x[0] = s->parts[k].x[0]; - s->cparts[k].x[1] = s->parts[k].x[1]; - s->cparts[k].x[2] = s->parts[k].x[2]; - s->cparts[k].h = s->parts[k].h; - s->cparts[k].dt = s->parts[k].dt; - } - /* Now that we have the cell structre, re-build the tasks. */ - // tic = getticks(); + tic = getticks(); space_maketasks( s , 1 ); - // printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); + printf( "space_prepare: maketasks took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); /* Count the number of each task type. */ + tic = getticks(); for ( k = 0 ; k <= task_type_count ; k++ ) counts[k] = 0; for ( k = 0 ; k < s->nr_tasks ; k++ ) @@ -229,6 +243,7 @@ void space_prepare ( struct space *s ) { for ( k = 1 ; k < task_type_count ; k++ ) printf( " %s=%i" , taskID_names[k] , counts[k] ); printf( " skipped=%i ]\n" , counts[ task_type_count ] ); fflush(stdout); + printf( "space_prepare: task counting took %.3f ms.\n" , (double)(getticks() - tic) / CPU_TPS * 1000 ); } @@ -762,6 +777,7 @@ void space_map_parts ( struct space *s , void (*fun)( struct part *p , struct ce } /* Call the recursive function on all higher-level cells. */ + #pragma omp parallel for schedule(dynamic,1) for ( i = 0 ; i < s->nr_cells ; i++ ) rec_map( &s->cells[i] ); @@ -798,6 +814,7 @@ void space_map_cells_post ( struct space *s , int full , void (*fun)( struct cel } /* Call the recursive function on all higher-level cells. */ + #pragma omp parallel for schedule(dynamic,1) for ( i = 0 ; i < s->nr_cells ; i++ ) rec_map( &s->cells[i] ); @@ -825,6 +842,7 @@ void space_map_cells_pre ( struct space *s , int full , void (*fun)( struct cell } /* Call the recursive function on all higher-level cells. */ + #pragma omp parallel for schedule(dynamic,1) for ( i = 0 ; i < s->nr_cells ; i++ ) rec_map( &s->cells[i] );