Commit 228931f3 authored by Pedro Gonnet's avatar Pedro Gonnet
Browse files

added node repartitioning based on graph partition with METIS.


Former-commit-id: fdc4f8b6f719be0cd2e5e9f6bbbf4dfdbcbeae29
parent 7c9a3897
......@@ -80,12 +80,11 @@ int cell_getsize ( struct cell *c ) {
* @param pc An array of packed #pcell.
* @param c The #cell in which to unpack the #pcell.
* @param s The #space in which the cells are created.
* @param parts The #part array holding the particle data.
*
* @return The number of cells created.
*/
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct part *parts ) {
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s ) {
int k, count = 1;
struct cell *temp;
......@@ -95,7 +94,6 @@ int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct p
c->dt_min = pc->dt_min;
c->dt_max = pc->dt_max;
c->count = pc->count;
c->parts = parts;
/* Fill the progeny recursively, depth-first. */
for ( k = 0 ; k < 8 ; k++ )
......@@ -122,8 +120,7 @@ int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct p
temp->parent = c;
c->progeny[k] = temp;
c->split = 1;
count += cell_unpack( &pc[ pc->progeny[k] ] , temp , s , parts );
parts = &parts[ temp->count ];
count += cell_unpack( &pc[ pc->progeny[k] ] , temp , s );
}
/* Return the total number of unpacked cells. */
......@@ -132,6 +129,33 @@ int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct p
}
/**
* @brief Link the cells recursively to the given part array.
*
* @param c The #cell.
* @param parts The #part array.
*
* @return The number of particles linked.
*/
int cell_link ( struct cell *c , struct part *parts ) {
int k, ind = 0;
c->parts = parts;
/* Fill the progeny recursively, depth-first. */
if ( c->split )
for ( k = 0 ; k < 8 ; k++ )
if ( c->progeny[k] != NULL )
ind += cell_link( c->progeny[k] , &parts[ind] );
/* Return the total number of unpacked cells. */
return c->count;
}
/**
* @brief Pack the data of the given cell and all it's sub-cells.
*
......
......@@ -147,5 +147,6 @@ void cell_split ( struct cell *c );
int cell_locktree( struct cell *c );
void cell_unlocktree( struct cell *c );
int cell_pack ( struct cell *c , struct pcell *pc );
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s , struct part *parts );
int cell_unpack ( struct pcell *pc , struct cell *c , struct space *s );
int cell_getsize ( struct cell *c );
int cell_link ( struct cell *c , struct part *parts );
This diff is collapsed.
......@@ -33,6 +33,7 @@
#define engine_queue_scale 1.2
#define engine_maxtaskspercell 32
#define engine_maxproxies 36
/* The rank of the engine as a global variable (for messages). */
......@@ -90,6 +91,9 @@ struct engine {
struct proxy *proxies;
int nr_proxies, *proxy_ind;
/* Force the engine to rebuild? */
int forcerebuild, forcerepart;
};
......@@ -101,3 +105,6 @@ void engine_step ( struct engine *e );
void engine_maketasks ( struct engine *e );
void engine_split ( struct engine *e , int *grid );
int engine_exchange_strays ( struct engine *e , struct part *parts , struct xpart *xparts , int *ind , int N );
void engine_rebuild ( struct engine *e );
void engine_repartition ( struct engine *e );
void engine_makeproxies ( struct engine *e );
......@@ -39,7 +39,7 @@
*/
#ifdef WITH_MPI
extern int engine_rank;
#define message(s, ...) printf( "%s[%03i]: " s "\n" , __FUNCTION__ , engine_rank , ##__VA_ARGS__ )
#define message(s, ...) printf( "[%03i] %s: " s "\n" , engine_rank , __FUNCTION__ , ##__VA_ARGS__ )
#else
#define message(s, ...) printf( "%s: " s "\n" , __FUNCTION__ , ##__VA_ARGS__ )
#endif
......@@ -584,7 +584,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, enu
h_err = H5Dwrite(h_data, hdf5Type(type), h_space, H5S_ALL, H5P_DEFAULT, temp);
if(h_err < 0)
{
error( "Error while reading data array '%s'." , name );
error( "Error while writing data array '%s'." , name );
}
/* Write XMF description for this data set */
......
......@@ -90,7 +90,6 @@ void proxy_cells_exch1 ( struct proxy *p ) {
/* Send the pcell buffer. */
if ( MPI_Isend( p->pcells_out , sizeof(struct pcell)*p->size_pcells_out , MPI_BYTE , p->nodeID , p->mynodeID*proxy_tag_shift + proxy_tag_cells , MPI_COMM_WORLD , &p->req_cells_out ) != MPI_SUCCESS )
error( "Failed to pcell_out buffer." );
MPI_Request_free( &p->req_cells_out );
// message( "isent pcells (%i) from node %i to node %i." , p->size_pcells_out , p->mynodeID , p->nodeID ); fflush(stdout);
/* Receive the number of pcells. */
......@@ -216,7 +215,6 @@ void proxy_parts_exch1 ( struct proxy *p ) {
MPI_Isend( p->xparts_out , sizeof(struct xpart)*p->nr_parts_out , MPI_BYTE , p->nodeID , p->mynodeID*proxy_tag_shift + proxy_tag_xparts , MPI_COMM_WORLD , &p->req_xparts_out ) != MPI_SUCCESS )
error( "Failed to isend part data." );
MPI_Request_free( &p->req_parts_out );
MPI_Request_free( &p->req_xparts_out );
// message( "isent particle data (%i) to node %i." , p->nr_parts_out , p->nodeID ); fflush(stdout);
/* for ( int k = 0 ; k < p->nr_parts_out ; k++ )
message( "sending particle %lli, x=[%.3e %.3e %.3e], h=%.3e, to node %i." ,
......
......@@ -764,7 +764,7 @@ void scheduler_enqueue ( struct scheduler *s , struct task *t ) {
case task_type_pair:
case task_type_sub:
qid = t->ci->super->owner;
if ( t->cj != NULL &&
if ( t->cj != NULL &&
( qid < 0 || s->queues[qid].count > s->queues[t->cj->super->owner].count ) )
qid = t->cj->super->owner;
break;
......
......@@ -163,7 +163,7 @@ void space_rebuild_recycle ( struct space *s , struct cell *c ) {
void space_regrid ( struct space *s , double cell_max ) {
float h_max = s->cell_min / kernel_gamma, dmin;
float h_max = s->cell_min / kernel_gamma / space_stretch, dmin;
int i, j, k, cdim[3], nr_parts = s->nr_parts;
struct cell *restrict c;
// ticks tic;
......@@ -281,7 +281,7 @@ void space_regrid ( struct space *s , double cell_max ) {
void space_rebuild ( struct space *s , double cell_max ) {
float h_max = s->cell_min / kernel_gamma, dmin;
float h_max = s->cell_min / kernel_gamma / space_stretch, dmin;
int i, j, k, cdim[3], nr_parts = s->nr_parts;
struct cell *restrict c, *restrict cells = s->cells;
struct part *restrict finger, *restrict p, *parts = s->parts;
......@@ -437,8 +437,8 @@ void space_rebuild ( struct space *s , double cell_max ) {
p = &parts[k];
ind[k] = cell_getid( cdim , p->x[0]*ih[0] , p->x[1]*ih[1] , p->x[2]*ih[2] );
cells[ ind[k] ].count += 1;
if ( cells[ ind[k] ].nodeID != nodeID )
error( "Received part that does not belong to me (nodeID=%i)." , cells[ ind[k] ].nodeID );
/* if ( cells[ ind[k] ].nodeID != nodeID )
error( "Received part that does not belong to me (nodeID=%i)." , cells[ ind[k] ].nodeID ); */
}
nr_parts = s->nr_parts;
#endif
......@@ -506,10 +506,12 @@ void space_rebuild ( struct space *s , double cell_max ) {
void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N , int min , int max ) {
struct {
struct qstack {
int i, j, min, max;
volatile int ready;
} qstack[space_qstack];
};
struct qstack *qstack;
int qstack_size = (max-min)/2 + 1;
volatile unsigned int first, last, waiting;
int pivot;
......@@ -517,18 +519,22 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
struct part temp_p;
struct xpart temp_xp;
/* Allocate the stack. */
if ( ( qstack = malloc( sizeof(struct qstack) * qstack_size ) ) == NULL )
error( "Failed to allocate qstack." );
/* Init the interval stack. */
qstack[0].i = 0;
qstack[0].j = N-1;
qstack[0].min = min;
qstack[0].max = max;
qstack[0].ready = 1;
for ( i = 1 ; i < space_qstack ; i++ )
for ( i = 1 ; i < qstack_size ; i++ )
qstack[i].ready = 0;
first = 0; last = 1; waiting = 1;
/* Parallel bit. */
#pragma omp parallel default(none) shared(first,last,waiting,qstack,parts,xparts,ind) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_xp,temp_p)
#pragma omp parallel default(none) shared(first,last,waiting,qstack,parts,xparts,ind,qstack_size,stderr,engine_rank) private(pivot,i,ii,j,jj,min,max,temp_i,qid,temp_xp,temp_p)
{
/* Main loop. */
......@@ -536,7 +542,7 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
while ( waiting > 0 ) {
/* Grab an interval off the queue. */
qid = atomic_inc( &first ) % space_qstack;
qid = atomic_inc( &first ) % qstack_size;
/* Wait for the interval to be ready. */
while ( waiting > 0 && atomic_cas( &qstack[qid].ready , 1 , 1 ) != 1 );
......@@ -590,14 +596,15 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
/* Recurse on the left? */
if ( jj > i && pivot > min ) {
qid = atomic_inc( &last ) % space_qstack;
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = i;
qstack[qid].j = jj;
qstack[qid].min = min;
qstack[qid].max = pivot;
qstack[qid].ready = 1;
atomic_inc( &waiting );
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the right? */
......@@ -614,14 +621,15 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
/* Recurse on the right? */
if ( jj+1 < j && pivot+1 < max ) {
qid = atomic_inc( &last ) % space_qstack;
qid = atomic_inc( &last ) % qstack_size;
while ( atomic_cas( &qstack[qid].ready , 0 , 0 ) != 0 );
qstack[qid].i = jj+1;
qstack[qid].j = j;
qstack[qid].min = pivot+1;
qstack[qid].max = max;
qstack[qid].ready = 1;
atomic_inc( &waiting );
if ( atomic_inc( &waiting ) >= qstack_size )
error( "Qstack overflow." );
}
/* Recurse on the left? */
......@@ -646,6 +654,9 @@ void parts_sort ( struct part *parts , struct xpart *xparts , int *ind , int N ,
/* for ( i = 1 ; i < N ; i++ )
if ( ind[i-1] > ind[i] )
error( "Sorting failed!" ); */
/* Clean up. */
free( qstack );
}
......@@ -976,7 +987,6 @@ struct cell *space_getcell ( struct space *s ) {
/* Init some things in the cell. */
bzero( c , sizeof(struct cell) );
c->nodeID = -1;
c->owner = -1;
if ( lock_init( &c->lock ) != 0 )
error( "Failed to initialize cell spinlock." );
......@@ -1012,6 +1022,7 @@ void space_init ( struct space *s , double dim[3] , struct part *parts , int N ,
s->parts = parts;
s->cell_min = h_max;
s->nr_queues = 1;
s->size_parts_foreign = 0;
/* Allocate the xtra parts array. */
if ( posix_memalign( (void *)&s->xparts , 32 , N * sizeof(struct xpart) ) != 0 )
......
......@@ -99,6 +99,10 @@ struct space {
/* The associated engine. */
struct engine *e;
/* Buffers for parts that we will receive from foreign cells. */
struct part *parts_foreign;
int nr_parts_foreign, size_parts_foreign;
};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment