diff --git a/src/engine.c b/src/engine.c index 546510788e8a3b27c192a46d5e76f40b3422c48b..b7de6cd685fb78a29a28637b28c9e96dee4c92e4 100644 --- a/src/engine.c +++ b/src/engine.c @@ -229,12 +229,12 @@ void engine_redistribute ( struct engine *e ) { int res; if ( ( res = MPI_Waitall( 4*nr_nodes , reqs , stats ) ) != MPI_SUCCESS ) { for ( k = 0 ; k < 4*nr_nodes ; k++ ) { - char buff[ MPI_MAX_ERROR_STRING ]; - int res; - MPI_Error_string( stats[k].MPI_ERROR , buff , &res ); - message( "request %i has error '%s'." , k , buff ); - } - message( "counts is [ %i %i %i %i ]." , counts[0] , counts[1] , counts[2] , counts[3] ); + char buff[ MPI_MAX_ERROR_STRING ]; + int res; + MPI_Error_string( stats[k].MPI_ERROR , buff , &res ); + message( "request %i has error '%s'." , k , buff ); + } + message( "counts is [ %i %i %i %i ]." , counts[0] , counts[1] , counts[2] , counts[3] ); error( "Failed during waitall for part data." ); } @@ -282,7 +282,7 @@ void engine_repartition ( struct engine *e ) { #if defined(WITH_MPI) && defined(HAVE_METIS) int i, j, k, l, cid, cjd, ii, jj, kk, res, w; - idx_t *inds; + idx_t *inds, *nodeIDs; idx_t *weights_v, *weights_e; struct space *s = e->s; int nr_cells = s->nr_cells, my_cells = 0; @@ -290,7 +290,7 @@ void engine_repartition ( struct engine *e ) { int ind[3], *cdim = s->cdim; struct task *t, *tasks = e->sched.tasks; struct cell *ci, *cj; - int nr_nodes = e->nr_nodes, nodeID = e->nodeID, *nodeIDs; + int nr_nodes = e->nr_nodes, nodeID = e->nodeID; float wscale = 0.0001, vscale = 0.001; /* Clear the repartition flag. */ @@ -424,13 +424,21 @@ void engine_repartition ( struct engine *e ) { } /* Merge the weights arrays accross all nodes. */ +#if IDXTYPEWIDTH==32 if ( ( res = MPI_Reduce( ( nodeID == 0 ) ? MPI_IN_PLACE : weights_v , weights_v , nr_cells , MPI_INT , MPI_SUM , 0 , MPI_COMM_WORLD ) ) != MPI_SUCCESS ) { +#else + if ( ( res = MPI_Reduce( ( nodeID == 0 ) ? MPI_IN_PLACE : weights_v , weights_v , nr_cells , MPI_LONG_LONG_INT , MPI_SUM , 0 , MPI_COMM_WORLD ) ) != MPI_SUCCESS ) { +#endif char buff[ MPI_MAX_ERROR_STRING ]; MPI_Error_string( res , buff , &i ); error( "Failed to allreduce vertex weights (%s)." , buff ); } +#if IDXTYPEWIDTH==32 if ( MPI_Reduce( ( nodeID == 0 ) ? MPI_IN_PLACE : weights_e , weights_e , 26*nr_cells , MPI_INT , MPI_SUM , 0 , MPI_COMM_WORLD ) != MPI_SUCCESS ) - error( "Failed to allreduce edge weights." ); +#else + if ( MPI_Reduce( ( nodeID == 0 ) ? MPI_IN_PLACE : weights_e , weights_e , 26*nr_cells , MPI_LONG_LONG_INT , MPI_SUM , 0 , MPI_COMM_WORLD ) != MPI_SUCCESS ) +#endif + error( "Failed to allreduce edge weights." ); /* As of here, only one node needs to compute the partition. */ if ( nodeID == 0 ) { @@ -466,9 +474,9 @@ void engine_repartition ( struct engine *e ) { for ( k = 0 ; k < 26*nr_cells ; k++ ) if ( weights_e[k] == 0 ) weights_e[k] = 1; - for ( k = 0 ; k < nr_cells ; k++ ) + for ( k = 0 ; k < nr_cells ; k++ ) if ( ( weights_v[k] *= vscale ) == 0 ) - weights_v[k] = 1; + weights_v[k] = 1; /* Allocate and fill the connection array. */ idx_t *offsets; @@ -484,27 +492,25 @@ void engine_repartition ( struct engine *e ) { options[ METIS_OPTION_OBJTYPE ] = METIS_OBJTYPE_CUT; options[ METIS_OPTION_NUMBERING ] = 0; options[ METIS_OPTION_CONTIG ] = 1; - options[ METIS_OPTION_NCUTS ] = 10; - options[ METIS_OPTION_NITER ] = 20; - // options[ METIS_OPTION_UFACTOR ] = 1; + options[ METIS_OPTION_NCUTS ] = 10; + options[ METIS_OPTION_NITER ] = 20; + // options[ METIS_OPTION_UFACTOR ] = 1; /* Set the initial partition, although this is probably ignored. */ for ( k = 0 ; k < nr_cells ; k++ ) nodeIDs[k] = cells[k].nodeID; /* Call METIS. */ - int one = 1; + idx_t one = 1, idx_nr_cells = nr_cells, idx_nr_nodes = nr_nodes; idx_t objval; - if ( METIS_PartGraphRecursive( &nr_cells , &one , offsets , inds , weights_v , NULL , weights_e , &nr_nodes , NULL , NULL , options , &objval , nodeIDs ) != METIS_OK ) + if ( METIS_PartGraphRecursive( &idx_nr_cells , &one , offsets , inds , weights_v , NULL , weights_e , &idx_nr_nodes , NULL , NULL , options , &objval , nodeIDs ) != METIS_OK ) error( "Call to METIS_PartGraphKway failed." ); - /* Dump the 3d array of cell IDs. */ - printf( "engine_repartition: nodeIDs = [" ); - for ( i = 0 ; i < cdim[0] ; i++ ) - for ( j = 0 ; j < cdim[1] ; j++ ) - for ( k = 0 ; k < cdim[2] ; k++ ) - printf( "%i " , nodeIDs[ cell_getid( cdim , i , j , k ) ] ); - printf("]; nodeIDs = reshape(nodeIDs,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); + /* Dump the 3d array of cell IDs. */ + printf( "engine_repartition: nodeIDs = reshape( [" ); + for ( i = 0 ; i < cdim[0]*cdim[1]*cdim[2] ; i++ ) + printf( "%i " , nodeIDs[ i ] ); + printf("] ,%i,%i,%i);\n",cdim[0],cdim[1],cdim[2]); } @@ -832,13 +838,13 @@ int engine_exchange_strays ( struct engine *e , struct part *parts , struct xpar for ( k = 0 ; k < 2*(nr_in + nr_out) ; k++ ) { int err; if ( ( err = MPI_Waitany( 2*e->nr_proxies , reqs_in , &pid , &status ) ) != MPI_SUCCESS ) { - char buff[ MPI_MAX_ERROR_STRING ]; - int res; - MPI_Error_string( err , buff , &res ); + char buff[ MPI_MAX_ERROR_STRING ]; + int res; + MPI_Error_string( err , buff , &res ); error( "MPI_Waitany failed (%s)." , buff ); - } - if ( pid == MPI_UNDEFINED ) - break; + } + if ( pid == MPI_UNDEFINED ) + break; if ( pid == MPI_UNDEFINED ) break; // message( "request from proxy %i has arrived." , pid ); @@ -958,7 +964,7 @@ void engine_maketasks ( struct engine *e ) { t->ci->density = engine_addlink( e , t->ci->density , t ); atomic_inc( &t->ci->nr_density ); if ( t->ci->nr_density > 27*8 ) - error( "Density overflow." ); + error( "Density overflow." ); } } else if ( t->type == task_type_pair ) { @@ -969,8 +975,8 @@ void engine_maketasks ( struct engine *e ) { atomic_inc( &t->ci->nr_density ); t->cj->density = engine_addlink( e , t->cj->density , t ); atomic_inc( &t->cj->nr_density ); - if ( t->ci->nr_density > 8*27 || t->cj->nr_density > 8*27 ) - error( "Density overflow." ); + if ( t->ci->nr_density > 8*27 || t->cj->nr_density > 8*27 ) + error( "Density overflow." ); } } else if ( t->type == task_type_sub ) { @@ -983,9 +989,9 @@ void engine_maketasks ( struct engine *e ) { if ( t->cj != NULL ) { t->cj->density = engine_addlink( e , t->cj->density , t ); atomic_inc( &t->cj->nr_density ); - if ( t->cj->nr_density > 8*27 ) - error( "Density overflow." ); - } + if ( t->cj->nr_density > 8*27 ) + error( "Density overflow." ); + } } } } @@ -1313,7 +1319,7 @@ void engine_prepare ( struct engine *e ) { if ( MPI_Allreduce( &rebuild , &buff , 1 , MPI_INT , MPI_MAX , MPI_COMM_WORLD ) != MPI_SUCCESS ) error( "Failed to aggreggate the rebuild flag accross nodes." ); rebuild = buff; - // message( "rebuild allreduce took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); + // message( "rebuild allreduce took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); #endif e->tic_step = getticks(); @@ -1321,15 +1327,15 @@ void engine_prepare ( struct engine *e ) { if ( rebuild ) { // tic = getticks(); engine_rebuild( e ); - // message( "engine_rebuild took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); - } + // message( "engine_rebuild took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); + } /* Re-rank the tasks every now and then. */ if ( e->tasks_age % engine_tasksreweight == 1 ) { - // tic = getticks(); + // tic = getticks(); scheduler_reweight( &e->sched ); - // message( "scheduler_reweight took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); - } + // message( "scheduler_reweight took %.3f ms." , (double)(getticks() - tic)/CPU_TPS*1000 ); + } e->tasks_age += 1; TIMER_TOC( timer_prepare ); @@ -1444,9 +1450,9 @@ void engine_collect_kick2 ( struct cell *c ) { // ih = 1.0f / p.h; // p.rho = 0.0f; p.rho_dh = 0.0f; // p.density.wcount = 0.0f; p.density.wcount_dh = 0.0f; -// p.density.div_v = 0.0; -// for ( k=0 ; k < 3 ; k++) -// p.density.curl_v[k] = 0.0; +// p.density.div_v = 0.0; +// for ( k=0 ; k < 3 ; k++) +// p.density.curl_v[k] = 0.0; // // /* Loop over all particle pairs (force). */ // for ( k = 0 ; k < N ; k++ ) { @@ -1877,7 +1883,6 @@ void engine_split ( struct engine *e , int *grid ) { int ind[3]; struct space *s = e->s; struct cell *c; - struct part *p; /* If we've got the wrong number of nodes, fail. */ if ( e->nr_nodes != grid[0]*grid[1]*grid[2] ) @@ -1898,17 +1903,6 @@ void engine_split ( struct engine *e , int *grid ) { /* Make the proxies. */ engine_makeproxies( e ); - /* For now, just kill any particle outside of our grid. */ - for ( k = 0 ; k < s->nr_parts ; k++ ) { - p = &s->parts[k]; - if ( s->cells[ cell_getid( s->cdim , p->x[0]*s->ih[0] , p->x[1]*s->ih[1] , p->x[2]*s->ih[2] ) ].nodeID != e->nodeID ) { - s->nr_parts -= 1; - s->parts[k] = s->parts[ s->nr_parts ]; - s->xparts[k] = s->xparts[ s->nr_parts ]; - k -= 1; - } - } - /* Re-allocate the local parts. */ s->size_parts = s->nr_parts * 1.2; struct part *parts_new;