Commit 7ee52298 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'mpi_fixes' into 'master'

Mpi fixes



See merge request !35

Former-commit-id: 897ba7af238c5ab40d8106539a1f72f8b2239e77
parents d1254f74 9ed33cf3
...@@ -137,7 +137,6 @@ void engine_redistribute(struct engine *e) { ...@@ -137,7 +137,6 @@ void engine_redistribute(struct engine *e) {
#ifdef WITH_MPI #ifdef WITH_MPI
int i, j, k, cid;
int nr_nodes = e->nr_nodes, nodeID = e->nodeID; int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
struct space *s = e->s; struct space *s = e->s;
int my_cells = 0; int my_cells = 0;
...@@ -146,9 +145,9 @@ void engine_redistribute(struct engine *e) { ...@@ -146,9 +145,9 @@ void engine_redistribute(struct engine *e) {
int nr_cells = s->nr_cells; int nr_cells = s->nr_cells;
/* Start by sorting the particles according to their nodes and /* Start by sorting the particles according to their nodes and
getting the counts. */ getting the counts. The counts array is indexed as
count[from * nr_nodes + to]. */
int *counts, *dest; int *counts, *dest;
struct part *parts = s->parts;
double ih[3], dim[3]; double ih[3], dim[3];
ih[0] = s->ih[0]; ih[0] = s->ih[0];
ih[1] = s->ih[1]; ih[1] = s->ih[1];
...@@ -160,15 +159,16 @@ void engine_redistribute(struct engine *e) { ...@@ -160,15 +159,16 @@ void engine_redistribute(struct engine *e) {
(dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) (dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
error("Failed to allocate count and dest buffers."); error("Failed to allocate count and dest buffers.");
bzero(counts, sizeof(int) * nr_nodes * nr_nodes); bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
for (k = 0; k < s->nr_parts; k++) { struct part *parts = s->parts;
for (j = 0; j < 3; j++) { for (int k = 0; k < s->nr_parts; k++) {
for (int j = 0; j < 3; j++) {
if (parts[k].x[j] < 0.0) if (parts[k].x[j] < 0.0)
parts[k].x[j] += dim[j]; parts[k].x[j] += dim[j];
else if (parts[k].x[j] >= dim[j]) else if (parts[k].x[j] >= dim[j])
parts[k].x[j] -= dim[j]; parts[k].x[j] -= dim[j];
} }
cid = cell_getid(cdim, parts[k].x[0] * ih[0], parts[k].x[1] * ih[1], const int cid = cell_getid(cdim, parts[k].x[0] * ih[0],
parts[k].x[2] * ih[2]); parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]);
dest[k] = cells[cid].nodeID; dest[k] = cells[cid].nodeID;
counts[nodeID * nr_nodes + dest[k]] += 1; counts[nodeID * nr_nodes + dest[k]] += 1;
} }
...@@ -181,7 +181,7 @@ void engine_redistribute(struct engine *e) { ...@@ -181,7 +181,7 @@ void engine_redistribute(struct engine *e) {
/* Get the new number of parts for this node, be generous in allocating. */ /* Get the new number of parts for this node, be generous in allocating. */
int nr_parts = 0; int nr_parts = 0;
for (k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
struct part *parts_new; struct part *parts_new;
struct xpart *xparts_new, *xparts = s->xparts; struct xpart *xparts_new, *xparts = s->xparts;
if (posix_memalign((void **)&parts_new, part_align, if (posix_memalign((void **)&parts_new, part_align,
...@@ -195,41 +195,44 @@ void engine_redistribute(struct engine *e) { ...@@ -195,41 +195,44 @@ void engine_redistribute(struct engine *e) {
if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) == if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) ==
NULL) NULL)
error("Failed to allocate MPI request list."); error("Failed to allocate MPI request list.");
for (k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
for (i = 0, j = 0, k = 0; k < nr_nodes; k++) { for (int offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) {
if (k == nodeID && counts[nodeID * nr_nodes + k] > 0) { int ind_send = nodeID * nr_nodes + k;
memcpy(&parts_new[j], &parts[i], int ind_recv = k * nr_nodes + nodeID;
sizeof(struct part) * counts[k * nr_nodes + nodeID]); if (counts[ind_send] > 0) {
memcpy(&xparts_new[j], &xparts[i], if (k == nodeID) {
sizeof(struct xpart) * counts[k * nr_nodes + nodeID]); memcpy(&parts_new[offset_recv], &s->parts[offset_send],
i += counts[nodeID * nr_nodes + k]; sizeof(struct part) * counts[ind_recv]);
j += counts[k * nr_nodes + nodeID]; memcpy(&xparts_new[offset_recv], &s->xparts[offset_send],
} sizeof(struct xpart) * counts[ind_recv]);
if (k != nodeID && counts[nodeID * nr_nodes + k] > 0) { offset_send += counts[ind_send];
if (MPI_Isend(&parts[i], offset_recv += counts[ind_recv];
sizeof(struct part) * counts[nodeID * nr_nodes + k], } else {
MPI_BYTE, k, 2 * (nodeID * nr_nodes + k) + 0, if (MPI_Isend(&s->parts[offset_send],
MPI_COMM_WORLD, &reqs[4 * k]) != MPI_SUCCESS) sizeof(struct part) * counts[ind_send], MPI_BYTE, k,
error("Failed to isend parts to node %i.", k); 2 * ind_send + 0, MPI_COMM_WORLD,
if (MPI_Isend(&xparts[i], &reqs[4 * k]) != MPI_SUCCESS)
sizeof(struct xpart) * counts[nodeID * nr_nodes + k], error("Failed to isend parts to node %i.", k);
MPI_BYTE, k, 2 * (nodeID * nr_nodes + k) + 1, if (MPI_Isend(&s->xparts[offset_send],
MPI_COMM_WORLD, &reqs[4 * k + 1]) != MPI_SUCCESS) sizeof(struct xpart) * counts[ind_send], MPI_BYTE, k,
error("Failed to isend xparts to node %i.", k); 2 * ind_send + 1, MPI_COMM_WORLD,
i += counts[nodeID * nr_nodes + k]; &reqs[4 * k + 1]) != MPI_SUCCESS)
error("Failed to isend xparts to node %i.", k);
offset_send += counts[ind_send];
}
} }
if (k != nodeID && counts[k * nr_nodes + nodeID] > 0) { if (k != nodeID && counts[ind_recv] > 0) {
if (MPI_Irecv(&parts_new[j], if (MPI_Irecv(&parts_new[offset_recv],
sizeof(struct part) * counts[k * nr_nodes + nodeID], sizeof(struct part) * counts[ind_recv], MPI_BYTE, k,
MPI_BYTE, k, 2 * (k * nr_nodes + nodeID) + 0, 2 * ind_recv + 0, MPI_COMM_WORLD,
MPI_COMM_WORLD, &reqs[4 * k + 2]) != MPI_SUCCESS) &reqs[4 * k + 2]) != MPI_SUCCESS)
error("Failed to emit irecv of parts from node %i.", k); error("Failed to emit irecv of parts from node %i.", k);
if (MPI_Irecv(&xparts_new[j], if (MPI_Irecv(&xparts_new[offset_recv],
sizeof(struct xpart) * counts[k * nr_nodes + nodeID], sizeof(struct xpart) * counts[ind_recv], MPI_BYTE, k,
MPI_BYTE, k, 2 * (k * nr_nodes + nodeID) + 1, 2 * ind_recv + 1, MPI_COMM_WORLD,
MPI_COMM_WORLD, &reqs[4 * k + 3]) != MPI_SUCCESS) &reqs[4 * k + 3]) != MPI_SUCCESS)
error("Failed to emit irecv of parts from node %i.", k); error("Failed to emit irecv of parts from node %i.", k);
j += counts[k * nr_nodes + nodeID]; offset_recv += counts[ind_recv];
} }
} }
...@@ -237,7 +240,7 @@ void engine_redistribute(struct engine *e) { ...@@ -237,7 +240,7 @@ void engine_redistribute(struct engine *e) {
MPI_Status stats[4 * nr_nodes]; MPI_Status stats[4 * nr_nodes];
int res; int res;
if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
for (k = 0; k < 4 * nr_nodes; k++) { for (int k = 0; k < 4 * nr_nodes; k++) {
char buff[MPI_MAX_ERROR_STRING]; char buff[MPI_MAX_ERROR_STRING];
int res; int res;
MPI_Error_string(stats[k].MPI_ERROR, buff, &res); MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
...@@ -264,7 +267,7 @@ void engine_redistribute(struct engine *e) { ...@@ -264,7 +267,7 @@ void engine_redistribute(struct engine *e) {
s->size_parts = 1.2 * nr_parts; s->size_parts = 1.2 * nr_parts;
/* Be verbose about what just happened. */ /* Be verbose about what just happened. */
for (k = 0; k < nr_cells; k++) for (int k = 0; k < nr_cells; k++)
if (cells[k].nodeID == nodeID) my_cells += 1; if (cells[k].nodeID == nodeID) my_cells += 1;
message("node %i now has %i parts in %i cells.", nodeID, nr_parts, my_cells); message("node %i now has %i parts in %i cells.", nodeID, nr_parts, my_cells);
...@@ -725,7 +728,6 @@ void engine_exchange_cells(struct engine *e) { ...@@ -725,7 +728,6 @@ void engine_exchange_cells(struct engine *e) {
MPI_Request reqs_in[engine_maxproxies]; MPI_Request reqs_in[engine_maxproxies];
MPI_Request reqs_out[engine_maxproxies]; MPI_Request reqs_out[engine_maxproxies];
MPI_Status status; MPI_Status status;
struct part *parts = &s->parts[s->nr_parts];
/* Run through the cells and get the size of the ones that will be sent off. /* Run through the cells and get the size of the ones that will be sent off.
*/ */
...@@ -802,7 +804,7 @@ void engine_exchange_cells(struct engine *e) { ...@@ -802,7 +804,7 @@ void engine_exchange_cells(struct engine *e) {
} }
/* Unpack the cells and link to the particle data. */ /* Unpack the cells and link to the particle data. */
parts = s->parts_foreign; struct part *parts = s->parts_foreign;
for (k = 0; k < nr_proxies; k++) { for (k = 0; k < nr_proxies; k++) {
for (count = 0, j = 0; j < e->proxies[k].nr_cells_in; j++) { for (count = 0, j = 0; j < e->proxies[k].nr_cells_in; j++) {
count += cell_link(e->proxies[k].cells_in[j], parts); count += cell_link(e->proxies[k].cells_in[j], parts);
......
...@@ -296,9 +296,7 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -296,9 +296,7 @@ void space_rebuild(struct space *s, double cell_max) {
int j, k, cdim[3], nr_parts = s->nr_parts, nr_gparts = s->nr_gparts; int j, k, cdim[3], nr_parts = s->nr_parts, nr_gparts = s->nr_gparts;
struct cell *restrict c, *restrict cells; struct cell *restrict c, *restrict cells;
struct part *restrict finger, *restrict p, *parts = s->parts; struct part *restrict p;
struct xpart *xfinger, *xparts = s->xparts;
struct gpart *gp, *gparts = s->gparts, *gfinger;
int *ind; int *ind;
double ih[3], dim[3]; double ih[3], dim[3];
// ticks tic; // ticks tic;
...@@ -325,7 +323,7 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -325,7 +323,7 @@ void space_rebuild(struct space *s, double cell_max) {
cdim[1] = s->cdim[1]; cdim[1] = s->cdim[1];
cdim[2] = s->cdim[2]; cdim[2] = s->cdim[2];
for (k = 0; k < nr_parts; k++) { for (k = 0; k < nr_parts; k++) {
p = &parts[k]; p = &s->parts[k];
for (j = 0; j < 3; j++) for (j = 0; j < 3; j++)
if (p->x[j] < 0.0) if (p->x[j] < 0.0)
p->x[j] += dim[j]; p->x[j] += dim[j];
...@@ -345,12 +343,12 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -345,12 +343,12 @@ void space_rebuild(struct space *s, double cell_max) {
if (cells[ind[k]].nodeID != nodeID) { if (cells[ind[k]].nodeID != nodeID) {
cells[ind[k]].count -= 1; cells[ind[k]].count -= 1;
nr_parts -= 1; nr_parts -= 1;
struct part tp = parts[k]; struct part tp = s->parts[k];
parts[k] = parts[nr_parts]; s->parts[k] = s->parts[nr_parts];
parts[nr_parts] = tp; s->parts[nr_parts] = tp;
struct xpart txp = xparts[k]; struct xpart txp = s->xparts[k];
xparts[k] = xparts[nr_parts]; s->xparts[k] = s->xparts[nr_parts];
xparts[nr_parts] = txp; s->xparts[nr_parts] = txp;
int t = ind[k]; int t = ind[k];
ind[k] = ind[nr_parts]; ind[k] = ind[nr_parts];
ind[nr_parts] = t; ind[nr_parts] = t;
...@@ -361,8 +359,6 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -361,8 +359,6 @@ void space_rebuild(struct space *s, double cell_max) {
s->nr_parts = s->nr_parts =
nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts],
s->nr_parts - nr_parts); s->nr_parts - nr_parts);
parts = s->parts;
xparts = s->xparts;
/* Re-allocate the index array if needed.. */ /* Re-allocate the index array if needed.. */
if (s->nr_parts > ind_size) { if (s->nr_parts > ind_size) {
...@@ -376,7 +372,7 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -376,7 +372,7 @@ void space_rebuild(struct space *s, double cell_max) {
/* Assign each particle to its cell. */ /* Assign each particle to its cell. */
for (k = nr_parts; k < s->nr_parts; k++) { for (k = nr_parts; k < s->nr_parts; k++) {
p = &parts[k]; p = &s->parts[k];
ind[k] = ind[k] =
cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
cells[ind[k]].count += 1; cells[ind[k]].count += 1;
...@@ -389,13 +385,13 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -389,13 +385,13 @@ void space_rebuild(struct space *s, double cell_max) {
/* Sort the parts according to their cells. */ /* Sort the parts according to their cells. */
// tic = getticks(); // tic = getticks();
parts_sort(parts, xparts, ind, nr_parts, 0, s->nr_cells - 1); parts_sort(s->parts, s->xparts, ind, nr_parts, 0, s->nr_cells - 1);
// message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS // message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 ); // * 1000 );
/* Re-link the gparts. */ /* Re-link the gparts. */
for (k = 0; k < nr_parts; k++) for (k = 0; k < nr_parts; k++)
if (parts[k].gpart != NULL) parts[k].gpart->part = &parts[k]; if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
/* Verify sort. */ /* Verify sort. */
/* for ( k = 1 ; k < nr_parts ; k++ ) { /* for ( k = 1 ; k < nr_parts ; k++ ) {
...@@ -415,7 +411,7 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -415,7 +411,7 @@ void space_rebuild(struct space *s, double cell_max) {
if ((ind = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL) if ((ind = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL)
error("Failed to allocate temporary particle indices."); error("Failed to allocate temporary particle indices.");
for (k = 0; k < nr_gparts; k++) { for (k = 0; k < nr_gparts; k++) {
gp = &gparts[k]; struct gpart *gp = &s->gparts[k];
for (j = 0; j < 3; j++) for (j = 0; j < 3; j++)
if (gp->x[j] < 0.0) if (gp->x[j] < 0.0)
gp->x[j] += dim[j]; gp->x[j] += dim[j];
...@@ -432,22 +428,22 @@ void space_rebuild(struct space *s, double cell_max) { ...@@ -432,22 +428,22 @@ void space_rebuild(struct space *s, double cell_max) {
/* Sort the parts according to their cells. */ /* Sort the parts according to their cells. */
// tic = getticks(); // tic = getticks();
gparts_sort(gparts, ind, nr_gparts, 0, s->nr_cells - 1); gparts_sort(s->gparts, ind, nr_gparts, 0, s->nr_cells - 1);
// message( "gparts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS // message( "gparts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 ); // * 1000 );
/* Re-link the parts. */ /* Re-link the parts. */
for (k = 0; k < nr_gparts; k++) for (k = 0; k < nr_gparts; k++)
if (gparts[k].id > 0) gparts[k].part->gpart = &gparts[k]; if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k];
/* We no longer need the indices as of here. */ /* We no longer need the indices as of here. */
free(ind); free(ind);
/* Hook the cells up to the parts. */ /* Hook the cells up to the parts. */
// tic = getticks(); // tic = getticks();
finger = parts; struct part *finger = s->parts;
xfinger = xparts; struct xpart *xfinger = s->xparts;
gfinger = gparts; struct gpart *gfinger = s->gparts;
for (k = 0; k < s->nr_cells; k++) { for (k = 0; k < s->nr_cells; k++) {
c = &cells[k]; c = &cells[k];
c->parts = finger; c->parts = finger;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment