Commit 7ee52298 authored by Peter W. Draper's avatar Peter W. Draper
Browse files

Merge branch 'mpi_fixes' into 'master'

Mpi fixes



See merge request !35

Former-commit-id: 897ba7af238c5ab40d8106539a1f72f8b2239e77
parents d1254f74 9ed33cf3
......@@ -137,7 +137,6 @@ void engine_redistribute(struct engine *e) {
#ifdef WITH_MPI
int i, j, k, cid;
int nr_nodes = e->nr_nodes, nodeID = e->nodeID;
struct space *s = e->s;
int my_cells = 0;
......@@ -146,9 +145,9 @@ void engine_redistribute(struct engine *e) {
int nr_cells = s->nr_cells;
/* Start by sorting the particles according to their nodes and
getting the counts. */
getting the counts. The counts array is indexed as
count[from * nr_nodes + to]. */
int *counts, *dest;
struct part *parts = s->parts;
double ih[3], dim[3];
ih[0] = s->ih[0];
ih[1] = s->ih[1];
......@@ -160,15 +159,16 @@ void engine_redistribute(struct engine *e) {
(dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL)
error("Failed to allocate count and dest buffers.");
bzero(counts, sizeof(int) * nr_nodes * nr_nodes);
for (k = 0; k < s->nr_parts; k++) {
for (j = 0; j < 3; j++) {
struct part *parts = s->parts;
for (int k = 0; k < s->nr_parts; k++) {
for (int j = 0; j < 3; j++) {
if (parts[k].x[j] < 0.0)
parts[k].x[j] += dim[j];
else if (parts[k].x[j] >= dim[j])
parts[k].x[j] -= dim[j];
}
cid = cell_getid(cdim, parts[k].x[0] * ih[0], parts[k].x[1] * ih[1],
parts[k].x[2] * ih[2]);
const int cid = cell_getid(cdim, parts[k].x[0] * ih[0],
parts[k].x[1] * ih[1], parts[k].x[2] * ih[2]);
dest[k] = cells[cid].nodeID;
counts[nodeID * nr_nodes + dest[k]] += 1;
}
......@@ -181,7 +181,7 @@ void engine_redistribute(struct engine *e) {
/* Get the new number of parts for this node, be generous in allocating. */
int nr_parts = 0;
for (k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID];
struct part *parts_new;
struct xpart *xparts_new, *xparts = s->xparts;
if (posix_memalign((void **)&parts_new, part_align,
......@@ -195,41 +195,44 @@ void engine_redistribute(struct engine *e) {
if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) ==
NULL)
error("Failed to allocate MPI request list.");
for (k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
for (i = 0, j = 0, k = 0; k < nr_nodes; k++) {
if (k == nodeID && counts[nodeID * nr_nodes + k] > 0) {
memcpy(&parts_new[j], &parts[i],
sizeof(struct part) * counts[k * nr_nodes + nodeID]);
memcpy(&xparts_new[j], &xparts[i],
sizeof(struct xpart) * counts[k * nr_nodes + nodeID]);
i += counts[nodeID * nr_nodes + k];
j += counts[k * nr_nodes + nodeID];
}
if (k != nodeID && counts[nodeID * nr_nodes + k] > 0) {
if (MPI_Isend(&parts[i],
sizeof(struct part) * counts[nodeID * nr_nodes + k],
MPI_BYTE, k, 2 * (nodeID * nr_nodes + k) + 0,
MPI_COMM_WORLD, &reqs[4 * k]) != MPI_SUCCESS)
for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL;
for (int offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) {
int ind_send = nodeID * nr_nodes + k;
int ind_recv = k * nr_nodes + nodeID;
if (counts[ind_send] > 0) {
if (k == nodeID) {
memcpy(&parts_new[offset_recv], &s->parts[offset_send],
sizeof(struct part) * counts[ind_recv]);
memcpy(&xparts_new[offset_recv], &s->xparts[offset_send],
sizeof(struct xpart) * counts[ind_recv]);
offset_send += counts[ind_send];
offset_recv += counts[ind_recv];
} else {
if (MPI_Isend(&s->parts[offset_send],
sizeof(struct part) * counts[ind_send], MPI_BYTE, k,
2 * ind_send + 0, MPI_COMM_WORLD,
&reqs[4 * k]) != MPI_SUCCESS)
error("Failed to isend parts to node %i.", k);
if (MPI_Isend(&xparts[i],
sizeof(struct xpart) * counts[nodeID * nr_nodes + k],
MPI_BYTE, k, 2 * (nodeID * nr_nodes + k) + 1,
MPI_COMM_WORLD, &reqs[4 * k + 1]) != MPI_SUCCESS)
if (MPI_Isend(&s->xparts[offset_send],
sizeof(struct xpart) * counts[ind_send], MPI_BYTE, k,
2 * ind_send + 1, MPI_COMM_WORLD,
&reqs[4 * k + 1]) != MPI_SUCCESS)
error("Failed to isend xparts to node %i.", k);
i += counts[nodeID * nr_nodes + k];
offset_send += counts[ind_send];
}
}
if (k != nodeID && counts[k * nr_nodes + nodeID] > 0) {
if (MPI_Irecv(&parts_new[j],
sizeof(struct part) * counts[k * nr_nodes + nodeID],
MPI_BYTE, k, 2 * (k * nr_nodes + nodeID) + 0,
MPI_COMM_WORLD, &reqs[4 * k + 2]) != MPI_SUCCESS)
if (k != nodeID && counts[ind_recv] > 0) {
if (MPI_Irecv(&parts_new[offset_recv],
sizeof(struct part) * counts[ind_recv], MPI_BYTE, k,
2 * ind_recv + 0, MPI_COMM_WORLD,
&reqs[4 * k + 2]) != MPI_SUCCESS)
error("Failed to emit irecv of parts from node %i.", k);
if (MPI_Irecv(&xparts_new[j],
sizeof(struct xpart) * counts[k * nr_nodes + nodeID],
MPI_BYTE, k, 2 * (k * nr_nodes + nodeID) + 1,
MPI_COMM_WORLD, &reqs[4 * k + 3]) != MPI_SUCCESS)
if (MPI_Irecv(&xparts_new[offset_recv],
sizeof(struct xpart) * counts[ind_recv], MPI_BYTE, k,
2 * ind_recv + 1, MPI_COMM_WORLD,
&reqs[4 * k + 3]) != MPI_SUCCESS)
error("Failed to emit irecv of parts from node %i.", k);
j += counts[k * nr_nodes + nodeID];
offset_recv += counts[ind_recv];
}
}
......@@ -237,7 +240,7 @@ void engine_redistribute(struct engine *e) {
MPI_Status stats[4 * nr_nodes];
int res;
if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) {
for (k = 0; k < 4 * nr_nodes; k++) {
for (int k = 0; k < 4 * nr_nodes; k++) {
char buff[MPI_MAX_ERROR_STRING];
int res;
MPI_Error_string(stats[k].MPI_ERROR, buff, &res);
......@@ -264,7 +267,7 @@ void engine_redistribute(struct engine *e) {
s->size_parts = 1.2 * nr_parts;
/* Be verbose about what just happened. */
for (k = 0; k < nr_cells; k++)
for (int k = 0; k < nr_cells; k++)
if (cells[k].nodeID == nodeID) my_cells += 1;
message("node %i now has %i parts in %i cells.", nodeID, nr_parts, my_cells);
......@@ -725,7 +728,6 @@ void engine_exchange_cells(struct engine *e) {
MPI_Request reqs_in[engine_maxproxies];
MPI_Request reqs_out[engine_maxproxies];
MPI_Status status;
struct part *parts = &s->parts[s->nr_parts];
/* Run through the cells and get the size of the ones that will be sent off.
*/
......@@ -802,7 +804,7 @@ void engine_exchange_cells(struct engine *e) {
}
/* Unpack the cells and link to the particle data. */
parts = s->parts_foreign;
struct part *parts = s->parts_foreign;
for (k = 0; k < nr_proxies; k++) {
for (count = 0, j = 0; j < e->proxies[k].nr_cells_in; j++) {
count += cell_link(e->proxies[k].cells_in[j], parts);
......
......@@ -296,9 +296,7 @@ void space_rebuild(struct space *s, double cell_max) {
int j, k, cdim[3], nr_parts = s->nr_parts, nr_gparts = s->nr_gparts;
struct cell *restrict c, *restrict cells;
struct part *restrict finger, *restrict p, *parts = s->parts;
struct xpart *xfinger, *xparts = s->xparts;
struct gpart *gp, *gparts = s->gparts, *gfinger;
struct part *restrict p;
int *ind;
double ih[3], dim[3];
// ticks tic;
......@@ -325,7 +323,7 @@ void space_rebuild(struct space *s, double cell_max) {
cdim[1] = s->cdim[1];
cdim[2] = s->cdim[2];
for (k = 0; k < nr_parts; k++) {
p = &parts[k];
p = &s->parts[k];
for (j = 0; j < 3; j++)
if (p->x[j] < 0.0)
p->x[j] += dim[j];
......@@ -345,12 +343,12 @@ void space_rebuild(struct space *s, double cell_max) {
if (cells[ind[k]].nodeID != nodeID) {
cells[ind[k]].count -= 1;
nr_parts -= 1;
struct part tp = parts[k];
parts[k] = parts[nr_parts];
parts[nr_parts] = tp;
struct xpart txp = xparts[k];
xparts[k] = xparts[nr_parts];
xparts[nr_parts] = txp;
struct part tp = s->parts[k];
s->parts[k] = s->parts[nr_parts];
s->parts[nr_parts] = tp;
struct xpart txp = s->xparts[k];
s->xparts[k] = s->xparts[nr_parts];
s->xparts[nr_parts] = txp;
int t = ind[k];
ind[k] = ind[nr_parts];
ind[nr_parts] = t;
......@@ -361,8 +359,6 @@ void space_rebuild(struct space *s, double cell_max) {
s->nr_parts =
nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts],
s->nr_parts - nr_parts);
parts = s->parts;
xparts = s->xparts;
/* Re-allocate the index array if needed.. */
if (s->nr_parts > ind_size) {
......@@ -376,7 +372,7 @@ void space_rebuild(struct space *s, double cell_max) {
/* Assign each particle to its cell. */
for (k = nr_parts; k < s->nr_parts; k++) {
p = &parts[k];
p = &s->parts[k];
ind[k] =
cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]);
cells[ind[k]].count += 1;
......@@ -389,13 +385,13 @@ void space_rebuild(struct space *s, double cell_max) {
/* Sort the parts according to their cells. */
// tic = getticks();
parts_sort(parts, xparts, ind, nr_parts, 0, s->nr_cells - 1);
parts_sort(s->parts, s->xparts, ind, nr_parts, 0, s->nr_cells - 1);
// message( "parts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 );
/* Re-link the gparts. */
for (k = 0; k < nr_parts; k++)
if (parts[k].gpart != NULL) parts[k].gpart->part = &parts[k];
if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k];
/* Verify sort. */
/* for ( k = 1 ; k < nr_parts ; k++ ) {
......@@ -415,7 +411,7 @@ void space_rebuild(struct space *s, double cell_max) {
if ((ind = (int *)malloc(sizeof(int) * s->size_gparts)) == NULL)
error("Failed to allocate temporary particle indices.");
for (k = 0; k < nr_gparts; k++) {
gp = &gparts[k];
struct gpart *gp = &s->gparts[k];
for (j = 0; j < 3; j++)
if (gp->x[j] < 0.0)
gp->x[j] += dim[j];
......@@ -432,22 +428,22 @@ void space_rebuild(struct space *s, double cell_max) {
/* Sort the parts according to their cells. */
// tic = getticks();
gparts_sort(gparts, ind, nr_gparts, 0, s->nr_cells - 1);
gparts_sort(s->gparts, ind, nr_gparts, 0, s->nr_cells - 1);
// message( "gparts_sort took %.3f ms." , (double)(getticks() - tic) / CPU_TPS
// * 1000 );
/* Re-link the parts. */
for (k = 0; k < nr_gparts; k++)
if (gparts[k].id > 0) gparts[k].part->gpart = &gparts[k];
if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k];
/* We no longer need the indices as of here. */
free(ind);
/* Hook the cells up to the parts. */
// tic = getticks();
finger = parts;
xfinger = xparts;
gfinger = gparts;
struct part *finger = s->parts;
struct xpart *xfinger = s->xparts;
struct gpart *gfinger = s->gparts;
for (k = 0; k < s->nr_cells; k++) {
c = &cells[k];
c->parts = finger;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment