diff --git a/examples/main.c b/examples/main.c index 43b56291c4eb023238c39556d536ab88c3daad0c..f770265c1b59dff93513a59ff7246258539325e3 100644 --- a/examples/main.c +++ b/examples/main.c @@ -362,8 +362,8 @@ int main(int argc, char *argv[]) { if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) - read_ic_parallel(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes, - MPI_COMM_WORLD, MPI_INFO_NULL); + read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); #else read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); @@ -396,8 +396,8 @@ int main(int argc, char *argv[]) { /* MATTHIEU: Temporary fix to preserve master */ if (!with_gravity) { free(gparts); - for(size_t k = 0; k < Ngas; ++k) - parts[k].gpart = NULL; + gparts = NULL; + for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL; Ngpart = 0; #if defined(WITH_MPI) N_long[0] = Ngas; diff --git a/src/cell.c b/src/cell.c index df11782048dfa80c697f53feefe8fabc104eb23b..b0e299999e9073cc14d92b769b3e157f50d64c8f 100644 --- a/src/cell.c +++ b/src/cell.c @@ -89,14 +89,18 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; c->count = pc->count; + c->gcount = pc->gcount; c->tag = pc->tag; - /* Fill the progeny recursively, depth-first. */ + /* Number of new cells created. */ int count = 1; + + /* Fill the progeny recursively, depth-first. */ for (int k = 0; k < 8; k++) if (pc->progeny[k] >= 0) { struct cell *temp = space_getcell(s); temp->count = 0; + temp->gcount = 0; temp->loc[0] = c->loc[0]; temp->loc[1] = c->loc[1]; temp->loc[2] = c->loc[2]; @@ -122,7 +126,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /** - * @brief Link the cells recursively to the given part array. + * @brief Link the cells recursively to the given #part array. * * @param c The #cell. * @param parts The #part array. @@ -130,7 +134,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { * @return The number of particles linked. */ -int cell_link(struct cell *c, struct part *parts) { +int cell_link_parts(struct cell *c, struct part *parts) { c->parts = parts; @@ -139,14 +143,40 @@ int cell_link(struct cell *c, struct part *parts) { int offset = 0; for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) - offset += cell_link(c->progeny[k], &parts[offset]); + offset += cell_link_parts(c->progeny[k], &parts[offset]); } } - /* Return the total number of unpacked cells. */ + /* Return the total number of linked particles. */ return c->count; } +/** + * @brief Link the cells recursively to the given #gpart array. + * + * @param c The #cell. + * @param gparts The #gpart array. + * + * @return The number of particles linked. + */ + +int cell_link_gparts(struct cell *c, struct gpart *gparts) { + + c->gparts = gparts; + + /* Fill the progeny recursively, depth-first. */ + if (c->split) { + int offset = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + offset += cell_link_gparts(c->progeny[k], &gparts[offset]); + } + } + + /* Return the total number of linked particles. */ + return c->gcount; +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * @@ -164,6 +194,7 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; pc->count = c->count; + pc->gcount = c->gcount; c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; /* Fill in the progeny, depth-first recursion. */ diff --git a/src/cell.h b/src/cell.h index b0451b311fda9c300427da6b3a9a25955090d799..05b870b144dd425ee9e72ee3b529122b6e721523 100644 --- a/src/cell.h +++ b/src/cell.h @@ -44,7 +44,7 @@ struct pcell { int ti_end_min, ti_end_max; /* Number of particles in this cell. */ - int count; + int count, gcount; /* tag used for MPI communication. */ int tag; @@ -175,7 +175,8 @@ void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); int cell_getsize(struct cell *c); -int cell_link(struct cell *c, struct part *parts); +int cell_link_parts(struct cell *c, struct part *parts); +int cell_link_gparts(struct cell *c, struct gpart *gparts); void cell_init_parts(struct cell *c, void *data); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); diff --git a/src/common_io.c b/src/common_io.c index 4be334ccd602c7b0c450f5dbe4aedd9155b35ff1..6183effe9ce392ab930c581cbd118f025bbce773 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -45,6 +45,9 @@ #include "kernel.h" #include "version.h" +const char* particle_type_names[NUM_PARTICLE_TYPES] = { + "Gas", "DM", "Boundary", "Dummy", "Star", "BH"}; + /** * @brief Converts a C data type to the HDF5 equivalent. * @@ -402,52 +405,68 @@ void createXMFfile() { *snapshot * * @param xmfFile The file to write in. - * @param Nparts The number of particles. * @param hdfFileName The name of the HDF5 file corresponding to this output. * @param time The current simulation time. */ -void writeXMFheader(FILE* xmfFile, long long Nparts, char* hdfFileName, - float time) { +void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time) { /* Write end of file */ + fprintf(xmfFile, "<!-- XMF description for file: %s -->\n", hdfFileName); fprintf(xmfFile, "<Grid GridType=\"Collection\" CollectionType=\"Spatial\">\n"); fprintf(xmfFile, "<Time Type=\"Single\" Value=\"%f\"/>\n", time); - fprintf(xmfFile, "<Grid Name=\"Gas\" GridType=\"Uniform\">\n"); - fprintf(xmfFile, - "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%lld\"/>\n", - Nparts); - fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n"); - fprintf(xmfFile, - "<DataItem Dimensions=\"%lld 3\" NumberType=\"Double\" " - "Precision=\"8\" " - "Format=\"HDF\">%s:/PartType0/Coordinates</DataItem>\n", - Nparts, hdfFileName); - fprintf(xmfFile, "</Geometry>"); } /** * @brief Writes the end of the XMF file (closes all open markups) * * @param xmfFile The file to write in. + * @param output The number of this output. + * @param time The current simulation time. */ -void writeXMFfooter(FILE* xmfFile) { +void writeXMFoutputfooter(FILE* xmfFile, int output, float time) { /* Write end of the section of this time step */ - fprintf(xmfFile, "\n</Grid>\n"); - fprintf(xmfFile, "</Grid>\n"); - fprintf(xmfFile, "\n</Grid>\n"); + fprintf(xmfFile, + "\n</Grid> <!-- End of meta-data for output=%03i, time=%f -->\n", + output, time); + fprintf(xmfFile, "\n</Grid> <!-- timeSeries -->\n"); fprintf(xmfFile, "</Domain>\n"); fprintf(xmfFile, "</Xdmf>\n"); fclose(xmfFile); } +void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, + enum PARTICLE_TYPE ptype) { + fprintf(xmfFile, "\n<Grid Name=\"%s\" GridType=\"Uniform\">\n", + particle_type_names[ptype]); + fprintf(xmfFile, + "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%zi\"/>\n", N); + fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n"); + fprintf(xmfFile, + "<DataItem Dimensions=\"%zi 3\" NumberType=\"Double\" " + "Precision=\"8\" " + "Format=\"HDF\">%s:/PartType%d/Coordinates</DataItem>\n", + N, hdfFileName, ptype); + fprintf(xmfFile, + "</Geometry>\n <!-- Done geometry for %s, start of particle fields " + "list -->\n", + particle_type_names[ptype]); +} + +void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype) { + fprintf(xmfFile, "</Grid> <!-- End of meta-data for parttype=%s -->\n", + particle_type_names[ptype]); +} + /** * @brief Writes the lines corresponding to an array of the HDF5 output * * @param xmfFile The file in which to write * @param fileName The name of the HDF5 file associated to this XMF descriptor. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array in the HDF5 file. * @param N The number of particles. * @param dim The dimension of the quantity (1 for scalars, 3 for vectors). @@ -455,21 +474,21 @@ void writeXMFfooter(FILE* xmfFile) { * * @todo Treat the types in a better way. */ -void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, - int dim, enum DATA_TYPE type) { +void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, + char* name, size_t N, int dim, enum DATA_TYPE type) { fprintf(xmfFile, "<Attribute Name=\"%s\" AttributeType=\"%s\" Center=\"Node\">\n", name, dim == 1 ? "Scalar" : "Vector"); if (dim == 1) fprintf(xmfFile, - "<DataItem Dimensions=\"%lld\" NumberType=\"Double\" " - "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n", - N, type == FLOAT ? 4 : 8, fileName, name); + "<DataItem Dimensions=\"%zi\" NumberType=\"Double\" " + "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", + N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); else fprintf(xmfFile, - "<DataItem Dimensions=\"%lld %d\" NumberType=\"Double\" " - "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n", - N, dim, type == FLOAT ? 4 : 8, fileName, name); + "<DataItem Dimensions=\"%zi %d\" NumberType=\"Double\" " + "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", + N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); fprintf(xmfFile, "</Attribute>\n"); } @@ -483,7 +502,7 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, * @param gparts The array of #gpart freshly read in. * @param Ndm The number of DM particles read in. */ -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { /* Let's give all these gparts a negative id */ for (size_t i = 0; i < Ndm; ++i) { @@ -508,8 +527,9 @@ void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { * @param Ngas The number of gas particles read in. * @param Ndm The number of DM particles read in. */ -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm) { +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm) { for (size_t i = 0; i < Ngas; ++i) { @@ -538,16 +558,19 @@ void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, * @param dmparts The array of #gpart containg DM particles to be filled. * @param Ndm The number of DM particles. */ -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm) { +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm) { size_t count = 0; /* Loop over all gparts */ for (size_t i = 0; i < Ntot; ++i) { + /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id, + * gparts[i].part); */ + /* And collect the DM ones */ - if (gparts[i].id < 0) { + if (gparts[i].id < 0LL) { memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart)); dmparts[count].id = -dmparts[count].id; count++; diff --git a/src/common_io.h b/src/common_io.h index 426fa6a01ec87a4413eceaaeb0d0880cbef8a214..961f40e63d771e5e06ade525301caf59aae0bceb 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -70,17 +70,20 @@ enum PARTICLE_TYPE { NUM_PARTICLE_TYPES }; +extern const char* particle_type_names[]; + #define FILENAME_BUFFER_SIZE 150 #define PARTICLE_GROUP_BUFFER_SIZE 20 hid_t hdf5Type(enum DATA_TYPE type); size_t sizeOfType(enum DATA_TYPE type); -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm); -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm); -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm); +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm); +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm); +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm); void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data); @@ -95,10 +98,13 @@ void writeAttribute_s(hid_t grp, char* name, const char* str); void createXMFfile(); FILE* prepareXMFfile(); -void writeXMFfooter(FILE* xmfFile); -void writeXMFheader(FILE* xmfFile, long long N, char* hdfFileName, float time); -void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, - int dim, enum DATA_TYPE type); +void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time); +void writeXMFoutputfooter(FILE* xmfFile, int outputCount, float time); +void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, + enum PARTICLE_TYPE ptype); +void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype); +void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, + char* name, size_t N, int dim, enum DATA_TYPE type); void writeCodeDescription(hid_t h_file); void writeSPHflavour(hid_t h_file); diff --git a/src/engine.c b/src/engine.c index 2d3a5337433280a2af736351f3b0611b01a8d2a0..28d75bba5378b83d4d0c8f6bf5c7705b40f6edb8 100644 --- a/src/engine.c +++ b/src/engine.c @@ -139,39 +139,56 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c, * @brief Redistribute the particles amongst the nodes according * to their cell's node IDs. * + * The strategy here is as follows: + * 1) Each node counts the number of particles it has to send to each other + * node. + * 2) The number of particles of each type is then exchanged. + * 3) The particles to send are placed in a temporary buffer in which the + * part-gpart links are preserved. + * 4) Each node allocates enough space for the new particles. + * 5) (Asynchronous) communications are issued to transfer the data. + * + * * @param e The #engine. */ - void engine_redistribute(struct engine *e) { #ifdef WITH_MPI - int nr_nodes = e->nr_nodes, nodeID = e->nodeID; + const int nr_nodes = e->nr_nodes; + const int nodeID = e->nodeID; struct space *s = e->s; - int my_cells = 0; - int *cdim = s->cdim; struct cell *cells = s->cells; - int nr_cells = s->nr_cells; + const int nr_cells = s->nr_cells; + const int *cdim = s->cdim; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + struct part *parts = s->parts; + struct xpart *xparts = s->xparts; + struct gpart *gparts = s->gparts; ticks tic = getticks(); - /* Start by sorting the particles according to their nodes and - getting the counts. The counts array is indexed as - count[from * nr_nodes + to]. */ - int *counts; - size_t *dest; - double ih[3], dim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - if ((counts = (int *)malloc(sizeof(int) *nr_nodes *nr_nodes)) == NULL || - (dest = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) - error("Failed to allocate count and dest buffers."); + /* Allocate temporary arrays to store the counts of particles to be sent + and the destination of each particle */ + int *counts, *g_counts; + if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate count temporary buffer."); + if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate gcount temporary buffer."); bzero(counts, sizeof(int) * nr_nodes * nr_nodes); - struct part *parts = s->parts; + bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes); + + // Allocate the destination index arrays. + int *dest, *g_dest; + if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) + error("Failed to allocate dest temporary buffer."); + if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) + error("Failed to allocate g_dest temporary buffer."); + + /* Get destination of each particle */ for (size_t k = 0; k < s->nr_parts; k++) { + + /* Periodic boundary conditions */ for (int j = 0; j < 3; j++) { if (parts[k].x[j] < 0.0) parts[k].x[j] += dim[j]; @@ -184,36 +201,121 @@ void engine_redistribute(struct engine *e) { error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */ dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ counts[nodeID * nr_nodes + dest[k]] += 1; } + + /* Sort the particles according to their cell index. */ space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + /* We need to re-link the gpart partners of parts. */ + int current_dest = dest[0]; + size_t count_this_dest = 0; + for (size_t k = 0; k < s->nr_parts; ++k) { + if (s->parts[k].gpart != NULL) { + + /* As the addresses will be invalidated by the communications, we will */ + /* instead store the absolute index from the start of the sub-array */ + /* of particles to be sent to a given node. */ + /* Recall that gparts without partners have a negative id. */ + /* We will restore the pointers on the receiving node later on. */ + if (dest[k] != current_dest) { + current_dest = dest[k]; + count_this_dest = 0; + } + + /* Debug */ + /* if(s->parts[k].gpart->id < 0) */ + /* error("Trying to link a partnerless gpart !"); */ + + s->parts[k].gpart->id = count_this_dest; + count_this_dest++; + } + } + + /* Get destination of each g-particle */ + for (size_t k = 0; k < s->nr_gparts; k++) { + + /* Periodic boundary conditions */ + for (int j = 0; j < 3; j++) { + if (gparts[k].x[j] < 0.0) + gparts[k].x[j] += dim[j]; + else if (gparts[k].x[j] >= dim[j]) + gparts[k].x[j] -= dim[j]; + } + const int cid = cell_getid(cdim, gparts[k].x[0] * ih[0], + gparts[k].x[1] * ih[1], gparts[k].x[2] * ih[2]); + /* if (cid < 0 || cid >= s->nr_cells) + error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", + cid, k, g_parts[k].x[0], g_parts[k].x[1], g_parts[k].x[2]); */ + g_dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ + g_counts[nodeID * nr_nodes + g_dest[k]] += 1; + } + + /* Sort the gparticles according to their cell index. */ + space_gparts_sort(gparts, g_dest, s->nr_gparts, 0, nr_nodes - 1); + /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle transfer counts."); - /* Get the new number of parts for this node, be generous in allocating. */ - size_t nr_parts = 0; + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Each node knows how many parts and gparts will be transferred to every + other node. We can start preparing to receive data */ + + /* Get the new number of parts and gparts for this node */ + size_t nr_parts = 0, nr_gparts = 0; for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_gparts += g_counts[k * nr_nodes + nodeID]; + + /* Allocate the new arrays with some extra margin */ struct part *parts_new = NULL; - struct xpart *xparts_new = NULL, *xparts = s->xparts; + struct xpart *xparts_new = NULL; + struct gpart *gparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, - sizeof(struct part) * nr_parts * 1.2) != 0 || - posix_memalign((void **)&xparts_new, part_align, - sizeof(struct xpart) * nr_parts * 1.2) != 0) + sizeof(struct part) * nr_parts * + engine_redistribute_alloc_margin) != 0) error("Failed to allocate new part data."); - - /* Emit the sends and recvs for the particle data. */ + if (posix_memalign((void **)&xparts_new, xpart_align, + sizeof(struct xpart) * nr_parts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new xpart data."); + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * nr_gparts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new gpart data."); + + /* Prepare MPI requests for the asynchronous communications */ MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) == + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) == NULL) error("Failed to allocate MPI request list."); - for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; - for (size_t offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) { - int ind_send = nodeID * nr_nodes + k; - int ind_recv = k * nr_nodes + nodeID; + for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + + /* Emit the sends and recvs for the particle and gparticle data. */ + size_t offset_send = 0, offset_recv = 0; + size_t g_offset_send = 0, g_offset_recv = 0; + for (int k = 0; k < nr_nodes; k++) { + + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; + + /* Are we sending any part/xpart ? */ if (counts[ind_send] > 0) { + + /* message("Sending %d part to node %d", counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ if (k == nodeID) { memcpy(&parts_new[offset_recv], &s->parts[offset_send], sizeof(struct part) * counts[ind_recv]); @@ -221,36 +323,73 @@ void engine_redistribute(struct engine *e) { sizeof(struct xpart) * counts[ind_recv]); offset_send += counts[ind_send]; offset_recv += counts[ind_recv]; + + /* Else, emit some communications */ } else { - if (MPI_Isend(&s->parts[offset_send], counts[ind_send], - e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD, - &reqs[4 * k]) != MPI_SUCCESS) - error("Failed to isend parts to node %zi.", k); - if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], - e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD, - &reqs[4 * k + 1]) != MPI_SUCCESS) - error("Failed to isend xparts to node %zi.", k); + if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type, + k, 3 * ind_send + 0, MPI_COMM_WORLD, + &reqs[6 * k]) != MPI_SUCCESS) + error("Failed to isend parts to node %i.", k); + if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type, + k, 3 * ind_send + 1, MPI_COMM_WORLD, + &reqs[6 * k + 1]) != MPI_SUCCESS) + error("Failed to isend xparts to node %i.", k); offset_send += counts[ind_send]; } } + + /* Are we sending any gpart ? */ + if (g_counts[ind_send] > 0) { + + /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ + if (k == nodeID) { + memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send], + sizeof(struct gpart) * g_counts[ind_recv]); + g_offset_send += g_counts[ind_send]; + g_offset_recv += g_counts[ind_recv]; + + /* Else, emit some communications */ + } else { + if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send], + gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD, + &reqs[6 * k + 2]) != MPI_SUCCESS) + error("Failed to isend gparts to node %i.", k); + g_offset_send += g_counts[ind_send]; + } + } + + /* Now emit the corresponding Irecv() */ + + /* Are we receiving any part/xpart from this node ? */ if (k != nodeID && counts[ind_recv] > 0) { - if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type, - k, 2 * ind_recv + 0, MPI_COMM_WORLD, - &reqs[4 * k + 2]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); - if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], - e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD, - &reqs[4 * k + 3]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); + if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k, + 3 * ind_recv + 0, MPI_COMM_WORLD, + &reqs[6 * k + 3]) != MPI_SUCCESS) + error("Failed to emit irecv of parts from node %i.", k); + if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type, + k, 3 * ind_recv + 1, MPI_COMM_WORLD, + &reqs[6 * k + 4]) != MPI_SUCCESS) + error("Failed to emit irecv of xparts from node %i.", k); offset_recv += counts[ind_recv]; } + + /* Are we receiving any gpart from this node ? */ + if (k != nodeID && g_counts[ind_recv] > 0) { + if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv], + gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD, + &reqs[6 * k + 5]) != MPI_SUCCESS) + error("Failed to emit irecv of gparts from node %i.", k); + g_offset_recv += g_counts[ind_recv]; + } } /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[4 * nr_nodes]; + MPI_Status stats[6 * nr_nodes]; int res; - if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 4 * nr_nodes; k++) { + if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 6 * nr_nodes; k++) { char buff[MPI_MAX_ERROR_STRING]; int res; MPI_Error_string(stats[k].MPI_ERROR, buff, &res); @@ -259,33 +398,90 @@ void engine_redistribute(struct engine *e) { error("Failed during waitall for part data."); } + /* We now need to restore the part<->gpart links */ + size_t offset_parts = 0, offset_gparts = 0; + for (int node = 0; node < nr_nodes; ++node) { + + const int ind_recv = node * nr_nodes + nodeID; + const size_t count_parts = counts[ind_recv]; + const size_t count_gparts = g_counts[ind_recv]; + + /* Loop over the gparts received from that node */ + for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { + + /* Does this gpart have a partner ? */ + if (gparts_new[k].id >= 0) { + + const size_t partner_index = offset_parts + gparts_new[k].id; + + /* Re-link */ + gparts_new[k].part = &parts_new[partner_index]; + gparts_new[k].part->gpart = &gparts_new[k]; + } + } + + offset_parts += count_parts; + offset_gparts += count_gparts; + } + /* Verify that all parts are in the right place. */ /* for ( int k = 0 ; k < nr_parts ; k++ ) { - int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] ); + int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], + parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] ); if ( cells[ cid ].nodeID != nodeID ) - error( "Received particle (%i) that does not belong here (nodeID=%i).", k , cells[ cid ].nodeID ); + error( "Received particle (%i) that does not belong here + (nodeID=%i).", k , cells[ cid ].nodeID ); } */ + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (gparts_new[k].id > 0) { + + if (gparts_new[k].part->gpart != &gparts_new[k]) + error("Linking problem !"); + + /* if (gparts_new[k].x[0] != gparts_new[k].part->x[0] || */ + /* gparts_new[k].x[1] != gparts_new[k].part->x[1] || */ + /* gparts_new[k].x[2] != gparts_new[k].part->x[2]) */ + /* error("Linked particles are not at the same position !"); */ + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (parts_new[k].gpart != NULL) { + + if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !"); + } + } + /* Set the new part data, free the old. */ free(parts); free(xparts); + free(gparts); s->parts = parts_new; s->xparts = xparts_new; + s->gparts = gparts_new; s->nr_parts = nr_parts; - s->size_parts = 1.2 * nr_parts; + s->nr_gparts = nr_gparts; + s->size_parts = engine_redistribute_alloc_margin * nr_parts; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts; - /* Be verbose about what just happened. */ - for (int k = 0; k < nr_cells; k++) - if (cells[k].nodeID == nodeID) my_cells += 1; - if (e->verbose) - message("node %i now has %zi parts in %i cells.", nodeID, nr_parts, - my_cells); - - /* Clean up other stuff. */ + /* Clean up the temporary stuff. */ free(reqs); free(counts); free(dest); + /* Be verbose about what just happened. */ + if (e->verbose) { + int my_cells = 0; + for (int k = 0; k < nr_cells; k++) + if (cells[k].nodeID == nodeID) my_cells += 1; + message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID, + nr_parts, nr_gparts, my_cells); + } + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -547,31 +743,40 @@ void engine_exchange_cells(struct engine *e) { /* Count the number of particles we need to import and re-allocate the buffer if needed. */ - int count_in = 0; + int count_parts_in = 0, count_gparts_in = 0; for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) - count_in += e->proxies[k].cells_in[j]->count; - if (count_in > s->size_parts_foreign) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + count_parts_in += e->proxies[k].cells_in[j]->count; + count_gparts_in += e->proxies[k].cells_in[j]->gcount; + } + if (count_parts_in > s->size_parts_foreign) { if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_in; + s->size_parts_foreign = 1.1 * count_parts_in; if (posix_memalign((void **)&s->parts_foreign, part_align, sizeof(struct part) * s->size_parts_foreign) != 0) error("Failed to allocate foreign part data."); } + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = 1.1 * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - cell_link(e->proxies[k].cells_in[j], parts); + cell_link_parts(e->proxies[k].cells_in[j], parts); + cell_link_gparts(e->proxies[k].cells_in[j], gparts); parts = &parts[e->proxies[k].cells_in[j]->count]; + gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; } } s->nr_parts_foreign = parts - s->parts_foreign; - - /* Is the parts buffer large enough? */ - if (s->nr_parts_foreign > s->size_parts_foreign) - error("Foreign parts buffer too small."); + s->nr_gparts_foreign = gparts - s->gparts_foreign; /* Free the pcell buffer. */ free(pcells); @@ -589,16 +794,24 @@ void engine_exchange_cells(struct engine *e) { * @brief Exchange straying parts with other nodes. * * @param e The #engine. - * @param offset The index in the parts array as of which the foreign parts - *reside. - * @param ind The ID of the foreign #cell. - * @param N The number of stray parts. + * @param offset_parts The index in the parts array as of which the foreign + * parts reside. + * @param ind_part The foreign #cell ID of each part. + * @param Npart The number of stray parts, contains the number of parts received + * on return. + * @param offset_gparts The index in the gparts array as of which the foreign + * parts reside. + * @param ind_gpart The foreign #cell ID of each gpart. + * @param Ngpart The number of stray gparts, contains the number of gparts + * received on return. * - * @return The number of arrived parts copied to parts and xparts. + * Note that this function does not mess-up the linkage between parts and + * gparts, i.e. the received particles have correct linkeage. */ -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, - size_t N) { +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart) { #ifdef WITH_MPI @@ -608,25 +821,49 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Re-set the proxies. */ for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0; - /* Put the parts into the corresponding proxies. */ - for (size_t k = 0; k < N; k++) { - const int node_id = e->s->cells[ind[k]].nodeID; + /* Put the parts and gparts into the corresponding proxies. */ + for (size_t k = 0; k < *Npart; k++) { + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells[ind_part[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; - if (pid < 0) + if (pid < 0) { error( "Do not have a proxy for the requested nodeID %i for part with " "id=%llu, x=[%e,%e,%e].", - node_id, s->parts[offset + k].id, s->parts[offset + k].x[0], - s->parts[offset + k].x[1], s->parts[offset + k].x[2]); - proxy_parts_load(&e->proxies[pid], &s->parts[offset + k], - &s->xparts[offset + k], 1); + node_id, s->parts[offset_parts + k].id, + s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1], + s->parts[offset_parts + k].x[2]); + } + + /* Re-link the associated gpart with the buffer offset of the part. */ + if (s->parts[offset_parts + k].gpart != NULL) { + s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_in; + } + + /* Load the part and xpart into the proxy. */ + proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], + &s->xparts[offset_parts + k], 1); + } + for (size_t k = 0; k < *Ngpart; k++) { + const int node_id = e->s->cells[ind_gpart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lli, x=[%e,%e,%e].", + node_id, s->gparts[offset_parts + k].id, + s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], + s->gparts[offset_gparts + k].x[2]); + proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); } /* Launch the proxies. */ - MPI_Request reqs_in[2 * engine_maxproxies]; - MPI_Request reqs_out[2 * engine_maxproxies]; + MPI_Request reqs_in[3 * engine_maxproxies]; + MPI_Request reqs_out[3 * engine_maxproxies]; for (int k = 0; k < e->nr_proxies; k++) { proxy_parts_exch1(&e->proxies[k]); reqs_in[k] = e->proxies[k].req_parts_count_in; @@ -650,11 +887,18 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Count the total number of incoming particles and make sure we have enough space to accommodate them. */ - size_t count_in = 0; - for (int k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in; - if (e->verbose) message("sent out %zi particles, got %zi back.", N, count_in); - if (offset + count_in > s->size_parts) { - s->size_parts = (offset + count_in) * 1.05; + int count_parts_in = 0; + int count_gparts_in = 0; + for (int k = 0; k < e->nr_proxies; k++) { + count_parts_in += e->proxies[k].nr_parts_in; + count_gparts_in += e->proxies[k].nr_gparts_in; + } + if (e->verbose) { + message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart, + count_parts_in, count_gparts_in); + } + if (offset_parts + count_parts_in > s->size_parts) { + s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, @@ -662,37 +906,61 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, posix_memalign((void **)&xparts_new, part_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); - memcpy(parts_new, s->parts, sizeof(struct part) * offset); - memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset); + memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); + memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); free(s->parts); free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; } + if (offset_gparts + count_gparts_in > s->size_gparts) { + s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); + free(s->gparts); + s->gparts = gparts_new; + } /* Collect the requests for the particle data from the proxies. */ int nr_in = 0, nr_out = 0; for (int k = 0; k < e->nr_proxies; k++) { if (e->proxies[k].nr_parts_in > 0) { - reqs_in[2 * k] = e->proxies[k].req_parts_in; - reqs_in[2 * k + 1] = e->proxies[k].req_xparts_in; + reqs_in[3 * k] = e->proxies[k].req_parts_in; + reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in; + nr_in += 2; + } else { + reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_in > 0) { + reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in; nr_in += 1; - } else - reqs_in[2 * k] = reqs_in[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_in[3 * k + 2] = MPI_REQUEST_NULL; + } if (e->proxies[k].nr_parts_out > 0) { - reqs_out[2 * k] = e->proxies[k].req_parts_out; - reqs_out[2 * k + 1] = e->proxies[k].req_xparts_out; + reqs_out[3 * k] = e->proxies[k].req_parts_out; + reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out; + nr_out += 2; + } else { + reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_out > 0) { + reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out; nr_out += 1; - } else - reqs_out[2 * k] = reqs_out[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_out[3 * k + 2] = MPI_REQUEST_NULL; + } } /* Wait for each part array to come in and collect the new parts from the proxies. */ - size_t count = 0; - for (int k = 0; k < 2 * (nr_in + nr_out); k++) { - int err = 0, pid = MPI_UNDEFINED; - if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid, + int count_parts = 0, count_gparts = 0; + for (int k = 0; k < nr_in; k++) { + int err, pid; + if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int res; @@ -700,26 +968,46 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, error("MPI_Waitany failed (%s).", buff); } if (pid == MPI_UNDEFINED) break; - // message( "request from proxy %i has arrived." , pid ); - if (reqs_in[pid & ~1] == MPI_REQUEST_NULL && - reqs_in[pid | 1] == MPI_REQUEST_NULL) { + // message( "request from proxy %i has arrived." , pid / 3 ); + pid = 3 * (pid / 3); + + /* If all the requests for a given proxy have arrived... */ + if (reqs_in[pid + 0] == MPI_REQUEST_NULL && + reqs_in[pid + 1] == MPI_REQUEST_NULL && + reqs_in[pid + 2] == MPI_REQUEST_NULL) { + /* Copy the particle data to the part/xpart/gpart arrays. */ struct proxy *p = &e->proxies[pid >> 1]; - memcpy(&s->parts[offset + count], p->parts_in, + memcpy(&s->parts[offset_parts + count_parts], p->parts_in, sizeof(struct part) * p->nr_parts_in); - memcpy(&s->xparts[offset + count], p->xparts_in, + memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in, sizeof(struct xpart) * p->nr_parts_in); + memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in, + sizeof(struct gpart) * p->nr_gparts_in); /* for (int k = offset; k < offset + count; k++) message( "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.", s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1], s->parts[k].x[2], s->parts[k].h, p->nodeID); */ - count += p->nr_parts_in; + + /* Re-link the gparts. */ + for (int k = 0; k < p->nr_gparts_in; k++) { + struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; + if (gp->id >= 0) { + struct part *p = &s->parts[offset_gparts + count_parts + gp->id]; + gp->part = p; + p->gpart = gp; + } + } + + /* Advance the counters. */ + count_parts += p->nr_parts_in; + count_gparts += p->nr_gparts_in; } } /* Wait for all the sends to have finished too. */ if (nr_out > 0) - if (MPI_Waitall(2 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != + if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); @@ -728,11 +1016,11 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, clocks_getunit()); /* Return the number of harvested parts. */ - return count; + *Npart = count_parts; + *Ngpart = count_gparts; #else error("SWIFT was not compiled with MPI support."); - return 0; #endif } @@ -1924,7 +2212,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { engine_makeproxies(e); /* Re-allocate the local parts. */ - if (e->nodeID == 0) + if (e->verbose) message("Re-allocating parts array from %zi to %zi.", s->size_parts, (size_t)(s->nr_parts * 1.2)); s->size_parts = s->nr_parts * 1.2; @@ -1932,7 +2220,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, part_align, + posix_memalign((void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts); @@ -1941,6 +2229,28 @@ void engine_split(struct engine *e, struct partition *initial_partition) { free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; + + /* Re-link the gparts. */ + for (size_t k = 0; k < s->nr_parts; k++) + if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + + /* Re-allocate the local gparts. */ + if (e->verbose) + message("Re-allocating gparts array from %zi to %zi.", s->size_gparts, + (size_t)(s->nr_gparts * 1.2)); + s->size_gparts = s->nr_gparts * 1.2; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts); + free(s->gparts); + s->gparts = gparts_new; + + /* Re-link the parts. */ + for (size_t k = 0; k < s->nr_gparts; k++) + if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + #else error("SWIFT was not compiled with MPI support."); #endif @@ -2173,8 +2483,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, /* Construct types for MPI communications */ #ifdef WITH_MPI - part_create_mpi_type(&e->part_mpi_type); - xpart_create_mpi_type(&e->xpart_mpi_type); + part_create_mpi_types(); #endif /* First of all, init the barrier and lock it. */ diff --git a/src/engine.h b/src/engine.h index 741ae1f553494e435394f529606b4cb794b0e3d2..4d1860b9eed0203bf9bf75711ec6e6549d837fe7 100644 --- a/src/engine.h +++ b/src/engine.h @@ -62,6 +62,8 @@ extern const char *engine_policy_names[]; #define engine_maxtaskspercell 96 #define engine_maxproxies 64 #define engine_tasksreweight 10 +#define engine_parts_size_grow 1.05 +#define engine_redistribute_alloc_margin 1.2 /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; @@ -160,12 +162,6 @@ struct engine { /* Are we talkative ? */ int verbose; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ @@ -182,7 +178,9 @@ void engine_init_particles(struct engine *e); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, size_t N); +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); void engine_makeproxies(struct engine *e); diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index d707d69631e65eed8ad21a7fa9601c07d3c71263..129c4b39828ca73d2d80d79edbdaa8ec4d5a9e01 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -48,6 +48,8 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param Ndm The number of DM particles on that MPI rank. * @param Ndm_total The total number of g-particles (only used in MPI mode) @@ -59,17 +61,20 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( * */ __attribute__((always_inline)) INLINE static void darkmatter_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int Ndm, long long Ndm_total, - int mpi_rank, long long offset, struct gpart* gparts, - struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, + int Ndm, long long Ndm_total, int mpi_rank, long long offset, + struct gpart* gparts, struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, Ndm, 3, gparts, - Ndm_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, Ndm, 1, gparts, - Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, Ndm, 3, gparts, - Ndm_total, mpi_rank, offset, v_full, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, Ndm, 1, gparts, - Ndm_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + Ndm, 3, gparts, Ndm_total, mpi_rank, offset, x, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, Ndm, + 1, gparts, Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + Ndm, 3, gparts, Ndm_total, mpi_rank, offset, v_full, us, + UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); } diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index 958bf5a1869718b57678246ff3b1985e54145824..0e9ad46ddc1d4e8c8d3ffdbf3e81262ec49a7092 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us, + UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 17c3d3013644c3572f3c26fc3e270b1c1bc465ed..c1c59dfa4980a2843e7e13bee4c964c9b254cae6 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,27 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, entropy, us, + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us, UNIT_CONV_ENTROPY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 2c56fb489ab84ca7c30426b54cf95e26e3821084..afe5de83f423e43b4d2480cca1ac3e84d6c549de 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us, + UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/parallel_io.c b/src/parallel_io.c index cffa99a0fd75566ec3e850076d15e104504eeb40..0076c225e1c5361287280f8a567c8062aefd914e 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -178,9 +178,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * * Calls #error() if an error occurs. */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, long long N_total, - int mpi_rank, long long offset, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, long long N_total, int mpi_rank, + long long offset, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_memspace = 0, h_filespace = 0, h_plist_id = 0; @@ -189,7 +190,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; char buffer[150]; @@ -269,7 +269,9 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - if (mpi_rank == 0) writeXMFline(xmfFile, fileName, name, N_total, dim, type); + if (mpi_rank == 0) + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, + type); /* Write unit conversion factors for this data set */ conversionString(buffer, us, convFactor); @@ -328,14 +330,16 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \ - mpi_rank, offset, (char*)(&(part[0]).field), us, \ - convFactor) +#define writeArray(grp, fileName, xmfFile, pTypeGroupName, name, type, N, dim, \ + part, N_total, mpi_rank, offset, field, us, convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, pTypeGroupName, name, type, N, \ + dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \ + sizeof(part[0]), us, convFactor) /* Import the right hydro definition */ #include "hydro_io.h" +/* Import the right gravity definition */ +#include "gravity_io.h" /** * @brief Reads an HDF5 initial condition file (GADGET-3 type) in parallel @@ -357,16 +361,17 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * */ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info) { + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info) { hid_t h_file = 0, h_grp = 0; - double boxSize[3] = { - 0.0, -1.0, -1.0}; /* GADGET has only cubic boxes (in cosmological mode) */ - int numParticles[6] = { - 0}; /* GADGET has 6 particle types. We only keep the type 0*/ - int numParticles_highWord[6] = {0}; - long long offset = 0; - long long N_total = 0; + /* GADGET has only cubic boxes (in cosmological mode) */ + double boxSize[3] = {0.0, -1.0, -1.0}; + int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ @@ -398,58 +403,116 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); - N_total = ((long long)numParticles[0]) + - ((long long)numParticles_highWord[0] << 32); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); + dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; - /* message("Found %d particles in a %speriodic box of size [%f %f %f].", */ - /* N_total, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */ + /* message("Found %d particles in a %speriodic box of size + * [%f %f %f].", */ + /* N_total, (periodic ? "": "non-"), dim[0], + * dim[1], dim[2]); */ /* Divide the particles among the tasks. */ - offset = mpi_rank * N_total / mpi_size; - *N = (mpi_rank + 1) * N_total / mpi_size - offset; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + offset[ptype] = mpi_rank * N_total[ptype] / mpi_size; + N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype]; + } /* Close header */ H5Gclose(h_grp); - /* Allocate memory to store particles */ - if (posix_memalign((void*)parts, part_align, *N * sizeof(struct part)) != 0) + /* Allocate memory to store SPH particles */ + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != + 0) error("Error while allocating memory for particles"); - bzero(*parts, *N * sizeof(struct part)); + bzero(*parts, *Ngas * sizeof(struct part)); - /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / + /* Allocate memory to store all particles */ + const size_t Ndm = N[1]; + *Ngparts = N[1] + N[0]; + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error( + "Error while allocating memory for gravity " + "particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + + /* message("Allocated %8.2f MB for particles.", *N * + * sizeof(struct part) / * (1024.*1024.)); */ - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) error("Error while opening particle group.\n"); + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, + * *Ngparts); */ - /* Read particle fields into the particle structure */ - hydro_read_particles(h_grp, *N, N_total, offset, *parts); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Close particle group */ - H5Gclose(h_grp); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + hydro_read_particles(h_grp, N[ptype], N_total[ptype], offset[ptype], + *parts); + break; + + case DM: + darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], + offset[ptype], *gparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + } + + /* Prepare the DM particles */ + prepare_dm_gparts(*gparts, Ndm); + + /* Now duplicate the hydro particle into gparts */ + duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + + /* message("Done Reading particles..."); */ /* Close property handler */ H5Pclose(h_plist_id); /* Close file */ H5Fclose(h_file); - - /* message("Done Reading particles..."); */ } /** - * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor + * @brief Writes an HDF5 output file (GADGET-3 type) with + *its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units in the output + * @param us The UnitSystem used for the conversion of units + *in the output * - * Creates an HDF5 output file and writes the particles contained - * in the engine. If such a file already exists, it is erased and replaced + * Creates an HDF5 output file and writes the particles + *contained + * in the engine. If such a file already exists, it is + *erased and replaced * by the new one. * The companion XMF file is also updated accordingly. * @@ -459,23 +522,27 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, void write_output_parallel(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info) { - hid_t h_file = 0, h_grp = 0, h_grpsph = 0; - int N = e->s->nr_parts; + const size_t Ngas = e->s->nr_parts; + const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; - unsigned int numParticles[6] = {N, 0}; - unsigned int numParticlesHighWord[6] = {0}; - unsigned int flagEntropy[6] = {0}; - long long N_total = 0, offset = 0; - double offset_d = 0., N_d = 0., N_total_d = 0.; int numFiles = 1; struct part* parts = e->s->parts; - FILE* xmfFile = 0; + struct gpart* gparts = e->s->gparts; + struct gpart* dmparts = NULL; static int outputCount = 0; + FILE* xmfFile = 0; + + /* Number of particles of each type */ + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ /* File name */ - char fileName[200]; - sprintf(fileName, "output_%03i.hdf5", outputCount); + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); /* First time, we need to create the XMF file */ if (outputCount == 0 && mpi_rank == 0) createXMFfile(); @@ -491,21 +558,26 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, error("Error while opening file '%s'.", fileName); } - /* Compute offset in the file and total number of particles */ - /* Done using double to allow for up to 2^50=10^15 particles */ - N_d = (double)N; - MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm); - N_total_d = offset_d + N_d; - MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm); - if (N_total_d > 1.e15) - error( - "Error while computing the offset for parallel output: Simulation has " - "more than 10^15 particles.\n"); - N_total = (long long)N_total_d; - offset = (long long)offset_d; + /* Compute offset in the file and total number of + * particles */ + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = offset[ptype] + N[ptype]; + + /* The last rank now has the correct N_total. Let's + * broadcast from there */ + MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); - /* Write the part of the XMF file corresponding to this specific output */ - if (mpi_rank == 0) writeXMFheader(xmfFile, N_total, fileName, e->time); + /* Now everybody konws its offset and the total number of + * particles of each + * type */ + + /* Write the part of the XMF file corresponding to this + * specific output */ + if (mpi_rank == 0) writeXMFoutputheader(xmfFile, fileName, e->time); /* Open header to write simulation properties */ /* message("Writing runtime parameters..."); */ @@ -526,19 +598,28 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, /* Print the relevant information and print status */ writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6); double dblTime = e->time; writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ - numParticles[0] = (unsigned int)N_total; - writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6); - numParticlesHighWord[0] = (unsigned int)(N_total >> 32); + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); + writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, - 6); + NUM_PARTICLE_TYPES); double MassTable[6] = {0., 0., 0., 0., 0., 0.}; - writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6); + writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); /* Close header */ @@ -556,21 +637,71 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, /* Print the system of Units */ writeUnitSystem(h_file, us); - /* Create SPH particles group */ - /* message("Writing particle arrays..."); */ - h_grp = - H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating particle group.\n"); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { + + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Add the global information for that particle type to + * the XMF meta-file */ + if (mpi_rank == 0) + writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype); + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } - /* Write particle fields from the particle structure */ - hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, offset, - parts, us); + /* Read particle fields into the particle structure */ + switch (ptype) { - /* Close particle group */ - H5Gclose(h_grp); + case GAS: + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, offset[ptype], + parts, us); + + break; + + case DM: + /* Allocate temporary array */ + if (posix_memalign((void*)&dmparts, gpart_align, + Ndm * sizeof(struct gpart)) != 0) + error( + "Error while allocating temporart memory for " + "DM particles"); + bzero(dmparts, Ndm * sizeof(struct gpart)); + + /* Collect the DM particles from gpart */ + collect_dm_gparts(gparts, Ntot, dmparts, Ndm); + + /* Write DM particles */ + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, + offset[ptype], dmparts, us); + + /* Free temporary array */ + free(dmparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype); + } /* Write LXMF file descriptor */ - if (mpi_rank == 0) writeXMFfooter(xmfFile); + if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ diff --git a/src/parallel_io.h b/src/parallel_io.h index a0589944ec845c712abde1e64e305980748db0e7..663f0aabac44c08682b964512839b925673ea5c5 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -32,8 +32,9 @@ #if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5) void read_ic_parallel(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info); + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info); void write_output_parallel(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, diff --git a/src/part.c b/src/part.c index 6a99325ef23a7062fafb387fa3f3bd6b2203d057..b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21 100644 --- a/src/part.c +++ b/src/part.c @@ -26,33 +26,21 @@ #endif /* This object's header. */ +#include "error.h" #include "part.h" #ifdef WITH_MPI -/** - * @brief Registers and returns an MPI type for the particles - * - * @param part_type The type container - */ -void part_create_mpi_type(MPI_Datatype* part_type) { - - /* This is not the recommended way of doing this. - One should define the structure field by field - But as long as we don't do serialization via MPI-IO - we don't really care. - Also we would have to modify this function everytime something - is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, - part_type); - MPI_Type_commit(part_type); -} +/* MPI data type for the particle transfers */ +MPI_Datatype part_mpi_type; +MPI_Datatype xpart_mpi_type; +MPI_Datatype gpart_mpi_type; +#endif +#ifdef WITH_MPI /** - * @brief Registers and returns an MPI type for the xparticles - * - * @param xpart_type The type container + * @brief Registers MPI particle types. */ -void xpart_create_mpi_type(MPI_Datatype* xpart_type) { +void part_create_mpi_types() { /* This is not the recommended way of doing this. One should define the structure field by field @@ -60,9 +48,20 @@ void xpart_create_mpi_type(MPI_Datatype* xpart_type) { we don't really care. Also we would have to modify this function everytime something is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE, - xpart_type); - MPI_Type_commit(xpart_type); + if (MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, + &part_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&part_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for parts."); + } + if (MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), + MPI_BYTE, &xpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&xpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for xparts."); + } + if (MPI_Type_contiguous(sizeof(struct gpart) / sizeof(unsigned char), + MPI_BYTE, &gpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for gparts."); + } } - #endif diff --git a/src/part.h b/src/part.h index 865403e8c2c157dc5a8ff7a32bc41be676d7919b..5d4c9c88a1acadea3d23a3df618c04da389fb61d 100644 --- a/src/part.h +++ b/src/part.h @@ -35,8 +35,8 @@ /* Some constants. */ #define part_align 64 -#define gpart_align 32 #define xpart_align 32 +#define gpart_align 32 /* Import the right particle definition */ #if defined(MINIMAL_SPH) @@ -52,8 +52,12 @@ #include "./gravity/Default/gravity_part.h" #ifdef WITH_MPI -void part_create_mpi_type(MPI_Datatype* part_type); -void xpart_create_mpi_type(MPI_Datatype* xpart_type); +/* MPI data type for the particle transfers */ +extern MPI_Datatype part_mpi_type; +extern MPI_Datatype xpart_mpi_type; +extern MPI_Datatype gpart_mpi_type; + +void part_create_mpi_types(); #endif #endif /* SWIFT_PART_H */ diff --git a/src/proxy.c b/src/proxy.c index 7d2e546bf945ca18c2195ea2801d1b2058cb2f58..02263a5653bdcdd2d1bf0a86523ed1a599d4bf21 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -50,11 +50,9 @@ void proxy_cells_exch1(struct proxy *p) { #ifdef WITH_MPI - int k, ind; - /* Get the number of pcells we will need to send. */ p->size_pcells_out = 0; - for (k = 0; k < p->nr_cells_out; k++) + for (int k = 0; k < p->nr_cells_out; k++) p->size_pcells_out += p->cells_out[k]->pcell_size; /* Send the number of pcells. */ @@ -70,7 +68,7 @@ void proxy_cells_exch1(struct proxy *p) { if ((p->pcells_out = malloc(sizeof(struct pcell) * p->size_pcells_out)) == NULL) error("Failed to allocate pcell_out buffer."); - for (ind = 0, k = 0; k < p->nr_cells_out; k++) { + for (int ind = 0, k = 0; k < p->nr_cells_out; k++) { memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell, sizeof(struct pcell) * p->cells_out[k]->pcell_size); ind += p->cells_out[k]->pcell_size; @@ -131,16 +129,14 @@ void proxy_cells_exch2(struct proxy *p) { void proxy_addcell_in(struct proxy *p, struct cell *c) { - int k; - struct cell **temp; - /* Check if the cell is already registered with the proxy. */ - for (k = 0; k < p->nr_cells_in; k++) + for (int k = 0; k < p->nr_cells_in; k++) if (p->cells_in[k] == c) return; /* Do we need to grow the number of in cells? */ if (p->nr_cells_in == p->size_cells_in) { p->size_cells_in *= proxy_buffgrow; + struct cell **temp; if ((temp = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL) error("Failed to allocate incoming cell list."); memcpy(temp, p->cells_in, sizeof(struct cell *) * p->nr_cells_in); @@ -162,16 +158,14 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) { void proxy_addcell_out(struct proxy *p, struct cell *c) { - int k; - struct cell **temp; - /* Check if the cell is already registered with the proxy. */ - for (k = 0; k < p->nr_cells_out; k++) + for (int k = 0; k < p->nr_cells_out; k++) if (p->cells_out[k] == c) return; /* Do we need to grow the number of out cells? */ if (p->nr_cells_out == p->size_cells_out) { p->size_cells_out *= proxy_buffgrow; + struct cell **temp; if ((temp = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL) error("Failed to allocate outgoing cell list."); memcpy(temp, p->cells_out, sizeof(struct cell *) * p->nr_cells_out); @@ -195,20 +189,21 @@ void proxy_parts_exch1(struct proxy *p) { #ifdef WITH_MPI /* Send the number of particles. */ - if (MPI_Isend(&p->nr_parts_out, 1, MPI_INT, p->nodeID, + p->buff_out[0] = p->nr_parts_out; + p->buff_out[1] = p->nr_gparts_out; + if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_out) != MPI_SUCCESS) error("Failed to isend nr of parts."); - // message( "isent particle count (%i) from node %i to node %i." , - // p->nr_parts_out , p->mynodeID , p->nodeID ); fflush(stdout); + /* message( "isent particle counts [%i, %i] from node %i to node %i." , + p->buff_out[0], p->buff_out[1], p->mynodeID , p->nodeID ); fflush(stdout); */ /* Send the particle buffers. */ if (p->nr_parts_out > 0) { - if (MPI_Isend(p->parts_out, sizeof(struct part) * p->nr_parts_out, MPI_BYTE, - p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_parts, + if (MPI_Isend(p->parts_out, p->nr_parts_out, part_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, &p->req_parts_out) != MPI_SUCCESS || - MPI_Isend(p->xparts_out, sizeof(struct xpart) * p->nr_parts_out, - MPI_BYTE, p->nodeID, + MPI_Isend(p->xparts_out, p->nr_parts_out, xpart_mpi_type, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_out) != MPI_SUCCESS) error("Failed to isend part data."); @@ -219,14 +214,20 @@ void proxy_parts_exch1(struct proxy *p) { p->parts_out[k].id, p->parts_out[k].x[0], p->parts_out[k].x[1], p->parts_out[k].x[2], p->parts_out[k].h, p->nodeID);*/ } + if (p->nr_gparts_out > 0) { + if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS) + error("Failed to isend part data."); + // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // p->nodeID ); fflush(stdout); + } /* Receive the number of particles. */ - if (MPI_Irecv(&p->nr_parts_in, 1, MPI_INT, p->nodeID, + if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_in) != MPI_SUCCESS) error("Failed to irecv nr of parts."); -// message( "irecv particle count on node %i from node %i." , p->mynodeID , -// p->nodeID ); fflush(stdout); #else error("SWIFT was not compiled with MPI support."); @@ -237,6 +238,10 @@ void proxy_parts_exch2(struct proxy *p) { #ifdef WITH_MPI + /* Unpack the incomming parts counts. */ + p->nr_parts_in = p->buff_in[0]; + p->nr_gparts_in = p->buff_in[1]; + /* Is there enough space in the buffer? */ if (p->nr_parts_in > p->size_parts_in) { do { @@ -250,19 +255,36 @@ void proxy_parts_exch2(struct proxy *p) { p->size_parts_in)) == NULL) error("Failed to re-allocate parts_in buffers."); } + if (p->nr_gparts_in > p->size_gparts_in) { + do { + p->size_gparts_in *= proxy_buffgrow; + } while (p->nr_gparts_in > p->size_gparts_in); + free(p->gparts_in); + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to re-allocate gparts_in buffers."); + } /* Receive the particle buffers. */ if (p->nr_parts_in > 0) { - if (MPI_Irecv(p->parts_in, sizeof(struct part) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_parts, - MPI_COMM_WORLD, &p->req_parts_in) != MPI_SUCCESS || - MPI_Irecv(p->xparts_in, sizeof(struct xpart) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_xparts, + if (MPI_Irecv(p->parts_in, p->nr_parts_in, part_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, + &p->req_parts_in) != MPI_SUCCESS || + MPI_Irecv(p->xparts_in, p->nr_parts_in, xpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_in) != MPI_SUCCESS) error("Failed to irecv part data."); // message( "irecv particle data (%i) from node %i." , p->nr_parts_in , // p->nodeID ); fflush(stdout); } + if (p->nr_gparts_in > 0) { + if (MPI_Irecv(p->gparts_in, p->nr_gparts_in, gpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_in) != MPI_SUCCESS) + error("Failed to irecv gpart data."); + // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , + // p->nodeID ); fflush(stdout); + } #else error("SWIFT was not compiled with MPI support."); @@ -278,8 +300,8 @@ void proxy_parts_exch2(struct proxy *p) { * @param N The number of parts. */ -void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, - int N) { +void proxy_parts_load(struct proxy *p, const struct part *parts, + const struct xpart *xparts, int N) { /* Is there enough space in the buffer? */ if (p->nr_parts_out + N > p->size_parts_out) { @@ -309,6 +331,37 @@ void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, p->nr_parts_out += N; } +/** + * @brief Load parts onto a proxy for exchange. + * + * @param p The #proxy. + * @param gparts Pointer to an array of #gpart to send. + * @param N The number of parts. + */ + +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { + + /* Is there enough space in the buffer? */ + if (p->nr_gparts_out + N > p->size_gparts_out) { + do { + p->size_gparts_out *= proxy_buffgrow; + } while (p->nr_gparts_out + N > p->size_gparts_out); + struct gpart *tp; + if ((tp = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to re-allocate gparts_out buffers."); + memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out); + free(p->gparts_out); + p->gparts_out = tp; + } + + /* Copy the parts and xparts data to the buffer. */ + memcpy(&p->gparts_out[p->nr_gparts_out], gparts, sizeof(struct gpart) * N); + + /* Increase the counters. */ + p->nr_gparts_out += N; +} + /** * @brief Initialize the given proxy. * @@ -358,4 +411,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { error("Failed to allocate parts_out buffers."); } p->nr_parts_out = 0; + + /* Allocate the gpart send and receive buffers, if needed. */ + if (p->gparts_in == NULL) { + p->size_gparts_in = proxy_buffinit; + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to allocate gparts_in buffers."); + } + p->nr_gparts_in = 0; + if (p->gparts_out == NULL) { + p->size_gparts_out = proxy_buffinit; + if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to allocate gparts_out buffers."); + } + p->nr_gparts_out = 0; } diff --git a/src/proxy.h b/src/proxy.h index 3cd33e0f0819ee1ecac53213630445b39c809dea..5a747187e05a78a109ce4523ebb3c9d5fe2ad717 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -32,7 +32,8 @@ #define proxy_tag_count 0 #define proxy_tag_parts 1 #define proxy_tag_xparts 2 -#define proxy_tag_cells 3 +#define proxy_tag_gparts 3 +#define proxy_tag_cells 4 /* Data structure for the proxy. */ struct proxy { @@ -53,14 +54,21 @@ struct proxy { /* The parts and xparts buffers for input and output. */ struct part *parts_in, *parts_out; struct xpart *xparts_in, *xparts_out; + struct gpart *gparts_in, *gparts_out; int size_parts_in, size_parts_out; int nr_parts_in, nr_parts_out; + int size_gparts_in, size_gparts_out; + int nr_gparts_in, nr_gparts_out; + + /* Buffer to hold the incomming/outgoing particle counts. */ + int buff_out[2], buff_in[2]; /* MPI request handles. */ #ifdef WITH_MPI MPI_Request req_parts_count_out, req_parts_count_in; MPI_Request req_parts_out, req_parts_in; MPI_Request req_xparts_out, req_xparts_in; + MPI_Request req_gparts_out, req_gparts_in; MPI_Request req_cells_count_out, req_cells_count_in; MPI_Request req_cells_out, req_cells_in; #endif @@ -68,8 +76,9 @@ struct proxy { /* Function prototypes. */ void proxy_init(struct proxy *p, int mynodeID, int nodeID); -void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, - int N); +void proxy_parts_load(struct proxy *p, const struct part *parts, + const struct xpart *xparts, int N); +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_parts_exch1(struct proxy *p); void proxy_parts_exch2(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c); diff --git a/src/queue.c b/src/queue.c index a7321155100df9225526c2f19fac2b99531307e4..6b788d7376ba4bdc95f1b1d918ab52a9514e7b4a 100644 --- a/src/queue.c +++ b/src/queue.c @@ -136,9 +136,6 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, lock_type *qlock = &q->lock; struct task *res = NULL; - /* If there are no tasks, leave immediately. */ - if (q->count == 0) return NULL; - /* Grab the task lock. */ if (blocking) { if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n"); @@ -146,6 +143,12 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, if (lock_trylock(qlock) != 0) return NULL; } + /* If there are no tasks, leave immediately. */ + if (q->count == 0) { + lock_unlock_blind(qlock); + return NULL; + } + /* Set some pointers we will use often. */ int *qtid = q->tid; struct task *qtasks = q->tasks; diff --git a/src/runner.c b/src/runner.c index 7eedb6adc72755ba12faed5429edad43d3849451..5e1b6497d757c8c25230823e7f8d60668f947618 100644 --- a/src/runner.c +++ b/src/runner.c @@ -664,7 +664,7 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { const float ti_current = r->e->ti_current; struct part *restrict p, *restrict parts = c->parts; struct xpart *restrict xp, *restrict xparts = c->xparts; - float dx_max = 0.f, h_max = 0.f; + float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; float w; TIMER_TIC @@ -709,16 +709,18 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, ti_old, ti_current, timeBase); - /* Compute motion since last cell construction */ - const float dx = - sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + - (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + - (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2])); - dx_max = fmaxf(dx_max, dx); + /* Compute (square of) motion since last cell construction */ + const float dx2 = (p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + + (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + + (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]); + dx2_max = fmaxf(dx2_max, dx2); /* Maximal smoothing length */ h_max = fmaxf(p->h, h_max); } + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); } /* Otherwise, aggregate data from children. */ diff --git a/src/scheduler.c b/src/scheduler.c index 386b0e27a234a938c72d1af55b35cdc63597e337..38a1cd8c663307e0c0378d8bec2e0cd3d8f37fa8 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -95,31 +95,29 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, void scheduler_splittasks(struct scheduler *s) { - int j, k, ind, sid, tid = 0, redo; - struct cell *ci, *cj; - double hi, hj, shift[3]; - struct task *t, *t_old; - int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, - {-1, -1, 11, 10, 5, 4, 2, 1}, - {-1, -1, -1, 12, 7, 6, 4, 3}, - {-1, -1, -1, -1, 8, 7, 5, 4}, - {-1, -1, -1, -1, -1, 12, 10, 9}, - {-1, -1, -1, -1, -1, -1, 11, 10}, - {-1, -1, -1, -1, -1, -1, -1, 12}}; - float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, - 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; + const int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, + {-1, -1, 11, 10, 5, 4, 2, 1}, + {-1, -1, -1, 12, 7, 6, 4, 3}, + {-1, -1, -1, -1, 8, 7, 5, 4}, + {-1, -1, -1, -1, -1, 12, 10, 9}, + {-1, -1, -1, -1, -1, -1, 11, 10}, + {-1, -1, -1, -1, -1, -1, -1, 12}}; + const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, + 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, + 0.5788, 0.4025, 0.5788}; /* Loop through the tasks... */ - redo = 0; - t_old = t = NULL; + int tid = 0, redo = 0; + struct task *t_old = NULL; while (1) { /* Get a pointer on the task. */ + struct task *t = t_old; if (redo) { redo = 0; - t = t_old; } else { - if ((ind = atomic_inc(&tid)) < s->nr_tasks) + const int ind = atomic_inc(&tid); + if (ind < s->nr_tasks) t_old = t = &s->tasks[s->tasks_ind[ind]]; else break; @@ -160,7 +158,7 @@ void scheduler_splittasks(struct scheduler *s) { if (t->type == task_type_self) { /* Get a handle on the cell involved. */ - ci = t->ci; + struct cell *ci = t->ci; /* Foreign task? */ if (ci->nodeID != s->nodeID) { @@ -186,18 +184,18 @@ void scheduler_splittasks(struct scheduler *s) { redo = 1; /* Add the self task. */ - for (k = 0; ci->progeny[k] == NULL; k++) - ; - t->ci = ci->progeny[k]; - for (k += 1; k < 8; k++) + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) if (ci->progeny[k] != NULL) scheduler_addtask(s, task_type_self, t->subtype, 0, 0, ci->progeny[k], NULL, 0); /* Make a task for each pair of progeny. */ - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL) - for (k = j + 1; k < 8; k++) + for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL) scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0, ci->progeny[j], ci->progeny[k], 0); @@ -210,10 +208,10 @@ void scheduler_splittasks(struct scheduler *s) { else if (t->type == task_type_pair) { /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; - hi = ci->dmin; - hj = cj->dmin; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const double hi = ci->dmin; + const double hj = cj->dmin; /* Foreign task? */ if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { @@ -223,7 +221,8 @@ void scheduler_splittasks(struct scheduler *s) { /* Get the sort ID, use space_getsid and not t->flags to make sure we get ci and cj swapped if needed. */ - sid = space_getsid(s->space, &ci, &cj, shift); + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); /* Should this task be split-up? */ if (ci->split && cj->split && @@ -479,9 +478,9 @@ void scheduler_splittasks(struct scheduler *s) { /* Replace the current task. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (cj->progeny[k] != NULL) { t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[j], cj->progeny[k], 0); @@ -520,8 +519,8 @@ void scheduler_splittasks(struct scheduler *s) { else if (t->type == task_type_grav_mm) { /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; /* Self-interaction? */ if (cj == NULL) { @@ -545,7 +544,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Split this task into tasks on its progeny. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -554,7 +553,7 @@ void scheduler_splittasks(struct scheduler *s) { } else t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, 0, 0, ci->progeny[j], NULL, 0); - for (k = j + 1; k < 8; k++) + for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -593,7 +592,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Get the opening angle theta. */ float dx[3], theta; - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { dx[k] = fabs(ci->loc[k] - cj->loc[k]); if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) dx[k] = -dx[k] + s->space->dim[k]; @@ -614,9 +613,9 @@ void scheduler_splittasks(struct scheduler *s) { /* Split this task into tasks on its progeny. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -662,17 +661,14 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, int flags, int wait, struct cell *ci, struct cell *cj, int tight) { - int ind; - struct task *t; - /* Get the next free task. */ - ind = atomic_inc(&s->tasks_next); + const int ind = atomic_inc(&s->tasks_next); /* Overflow? */ if (ind >= s->size) error("Task list overflow."); /* Get a pointer to the new task. */ - t = &s->tasks[ind]; + struct task *t = &s->tasks[ind]; /* Copy the data. */ t->type = type; @@ -767,24 +763,24 @@ void scheduler_set_unlocks(struct scheduler *s) { void scheduler_ranktasks(struct scheduler *s) { - int i, j = 0, k, temp, left = 0, rank; - struct task *t, *tasks = s->tasks; - int *tid = s->tasks_ind, nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + int *tid = s->tasks_ind; + const int nr_tasks = s->nr_tasks; /* Run through the tasks and get all the waits right. */ - for (i = 0, k = 0; k < nr_tasks; k++) { + for (int k = 0; k < nr_tasks; k++) { tid[k] = k; - for (j = 0; j < tasks[k].nr_unlock_tasks; j++) + for (int j = 0; j < tasks[k].nr_unlock_tasks; j++) tasks[k].unlock_tasks[j]->wait += 1; } /* Main loop. */ - for (j = 0, rank = 0; left < nr_tasks; rank++) { + for (int j = 0, rank = 0, left = 0; left < nr_tasks; rank++) { /* Load the tids of tasks with no waits. */ - for (k = left; k < nr_tasks; k++) + for (int k = left; k < nr_tasks; k++) if (tasks[tid[k]].wait == 0) { - temp = tid[j]; + int temp = tid[j]; tid[j] = tid[k]; tid[k] = temp; j += 1; @@ -794,15 +790,16 @@ void scheduler_ranktasks(struct scheduler *s) { if (j == left) error("Unsatisfiable task dependencies detected."); /* Unlock the next layer of tasks. */ - for (i = left; i < j; i++) { - t = &tasks[tid[i]]; + for (int i = left; i < j; i++) { + struct task *t = &tasks[tid[i]]; t->rank = rank; tid[i] = t - tasks; if (tid[i] >= nr_tasks) error("Task index overshoot."); /* message( "task %i of type %s has rank %i." , i , (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */ - for (k = 0; k < t->nr_unlock_tasks; k++) t->unlock_tasks[k]->wait -= 1; + for (int k = 0; k < t->nr_unlock_tasks; k++) + t->unlock_tasks[k]->wait -= 1; } /* The new left (no, not tony). */ @@ -824,8 +821,6 @@ void scheduler_ranktasks(struct scheduler *s) { void scheduler_reset(struct scheduler *s, int size) { - int k; - /* Do we need to re-allocate? */ if (size > s->size) { @@ -852,7 +847,7 @@ void scheduler_reset(struct scheduler *s, int size) { s->nr_unlocks = 0; /* Set the task pointers in the queues. */ - for (k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; + for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; } /** @@ -863,21 +858,23 @@ void scheduler_reset(struct scheduler *s, int size) { void scheduler_reweight(struct scheduler *s) { - int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind; - struct task *t, *tasks = s->tasks; - int nodeID = s->nodeID; - float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, - 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; - float wscale = 0.001; + const int nr_tasks = s->nr_tasks; + int *tid = s->tasks_ind; + struct task *tasks = s->tasks; + const int nodeID = s->nodeID; + const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, + 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, + 0.5788, 0.4025, 0.5788}; + const float wscale = 0.001; // ticks tic; /* Run through the tasks backwards and set their waits and weights. */ // tic = getticks(); - for (k = nr_tasks - 1; k >= 0; k--) { - t = &tasks[tid[k]]; + for (int k = nr_tasks - 1; k >= 0; k--) { + struct task *t = &tasks[tid[k]]; t->weight = 0; - for (j = 0; j < t->nr_unlock_tasks; j++) + for (int j = 0; j < t->nr_unlock_tasks; j++) if (t->unlock_tasks[j]->weight > t->weight) t->weight = t->unlock_tasks[j]->weight; if (!t->implicit && t->tic > 0) @@ -958,8 +955,9 @@ void scheduler_reweight(struct scheduler *s) { void scheduler_start(struct scheduler *s, unsigned int mask, unsigned int submask) { - int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; - struct task *t, *tasks = s->tasks; + const int nr_tasks = s->nr_tasks; + int *tid = s->tasks_ind; + struct task *tasks = s->tasks; // ticks tic; /* Store the masks */ @@ -985,8 +983,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, const int waiting_old = s->waiting; /* We are going to use the task structure in a modified way to pass - information - to the task. Don't do this at home ! + information to the task. Don't do this at home ! - ci and cj will give the range of tasks to which the waits will be applied - the flags will be used to transfer the mask - the rank will be used to transfer the submask @@ -1011,6 +1008,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Wait for the rewait tasks to have executed. */ pthread_mutex_lock(&s->sleep_mutex); + pthread_cond_broadcast(&s->sleep_cond); while (s->waiting > waiting_old) { pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); } @@ -1024,7 +1022,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Loop over the tasks and enqueue whoever is ready. */ // tic = getticks(); for (int k = 0; k < s->nr_tasks; k++) { - t = &tasks[tid[k]]; + struct task *t = &tasks[tid[k]]; if (atomic_dec(&t->wait) == 1 && ((1 << t->type) & s->mask) && ((1 << t->subtype) & s->submask) && !t->skip) { scheduler_enqueue(s, t); @@ -1032,6 +1030,11 @@ void scheduler_start(struct scheduler *s, unsigned int mask, } } + /* To be safe, fire of one last sleep_cond in a safe way. */ + pthread_mutex_lock(&s->sleep_mutex); + pthread_cond_broadcast(&s->sleep_cond); + pthread_mutex_unlock(&s->sleep_mutex); + // message( "enqueueing tasks took %.3f %s." , // clocks_from_ticks( getticks() - tic ), clocks_getunit()); } @@ -1045,10 +1048,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask, void scheduler_enqueue(struct scheduler *s, struct task *t) { + /* The target queue for this task. */ int qid = -1; -#ifdef WITH_MPI - int err; -#endif /* Fail if this task has already been enqueued before. */ if (t->rid >= 0) error("Task has already been enqueued."); @@ -1070,6 +1071,9 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* Otherwise, look for a suitable queue. */ else { +#ifdef WITH_MPI + int err; +#endif /* Find the previous owner for each task type, and do any pre-processing needed. */ @@ -1092,13 +1096,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - if ((err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to emit irecv for particle data (%s).", buff); + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (err != MPI_SUCCESS) { + mpi_error(err, "Failed to emit irecv for particle data."); } // message( "receiving %i parts with tag=%i from %i to %i." , // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); @@ -1110,13 +1111,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - if ((err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to emit isend for particle data (%s).", buff); + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (err != MPI_SUCCESS) { + mpi_error(err, "Failed to emit isend for particle data."); } // message( "sending %i parts with tag=%i from %i to %i." , // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); @@ -1132,7 +1130,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { if (qid >= s->nr_queues) error("Bad computed qid."); - /* If no previous owner, find the shortest queue. */ + /* If no previous owner, pick a random queue. */ if (qid < 0) qid = rand() % s->nr_queues; /* Increase the waiting counter. */ @@ -1163,7 +1161,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; - int res = atomic_dec(&t2->wait); + const int res = atomic_dec(&t2->wait); if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1202,7 +1200,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { they are ready. */ for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; - int res = atomic_dec(&t2->wait); + const int res = atomic_dec(&t2->wait); if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1239,7 +1237,7 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, const struct task *prev) { struct task *res = NULL; - int k, nr_queues = s->nr_queues; + const int nr_queues = s->nr_queues; unsigned int seed = qid; /* Check qid. */ @@ -1263,10 +1261,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, /* If unsuccessful, try stealing from the other queues. */ if (s->flags & scheduler_flag_steal) { int count = 0, qids[nr_queues]; - for (k = 0; k < nr_queues; k++) + for (int k = 0; k < nr_queues; k++) if (s->queues[k].count > 0) qids[count++] = k; - for (k = 0; k < scheduler_maxsteal && count > 0; k++) { - int ind = rand_r(&seed) % count; + for (int k = 0; k < scheduler_maxsteal && count > 0; k++) { + const int ind = rand_r(&seed) % count; TIMER_TIC res = queue_gettask(&s->queues[qids[ind]], prev, 0); TIMER_TOC(timer_qsteal); @@ -1286,7 +1284,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, if (res == NULL) { #endif pthread_mutex_lock(&s->sleep_mutex); - if (s->waiting > 0) pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); + res = queue_gettask(&s->queues[qid], prev, 1); + if (res == NULL && s->waiting > 0) { + pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); + } pthread_mutex_unlock(&s->sleep_mutex); } } @@ -1351,12 +1352,6 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, s->tasks = NULL; s->tasks_ind = NULL; scheduler_reset(s, nr_tasks); - -/* Construct types for MPI communications */ -#ifdef WITH_MPI - part_create_mpi_type(&s->part_mpi_type); - xpart_create_mpi_type(&s->xpart_mpi_type); -#endif } /** @@ -1365,7 +1360,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, * @param s The #scheduler * @param fileName Name of the file to write to */ -void scheduler_print_tasks(struct scheduler *s, char *fileName) { +void scheduler_print_tasks(const struct scheduler *s, const char *fileName) { const int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; struct task *t, *tasks = s->tasks; diff --git a/src/scheduler.h b/src/scheduler.h index 3f2d8c289d0d691d0d155b20ae0522c5830524aa..64c694aea295c13810a20b626055fc6c15eb0af8 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -100,12 +100,6 @@ struct scheduler { /* The node we are working on. */ int nodeID; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ @@ -128,7 +122,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t); void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb); void scheduler_set_unlocks(struct scheduler *s); void scheduler_dump_queue(struct scheduler *s); -void scheduler_print_tasks(struct scheduler *s, char *fileName); +void scheduler_print_tasks(const struct scheduler *s, const char *fileName); void scheduler_do_rewait(struct task *t_begin, struct task *t_end, unsigned int mask, unsigned int submask); diff --git a/src/serial_io.c b/src/serial_io.c index 3fb1a1d1b743e48be4e8e6d81413c28e8034b870..40bd2b1c8921f4acbfa0950984d6915ebd3d241e 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -57,14 +57,13 @@ * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param importance If COMPULSORY, the data must be present in the IC file. If *OPTIONAL, the array will be zeroed when the data is not present. * * @todo A better version using HDF5 hyper-slabs to read the file directly into *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim, long long N_total, long long offset, @@ -172,9 +171,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * Routines writing an output file *-----------------------------------------------------------------------------*/ -void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, long long N_total, int dim, - struct UnitSystem* us, enum UnitConversionFactor convFactor) { +void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + long long N_total, int dim, struct UnitSystem* us, + enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0; int rank = 0; hsize_t shape[2]; @@ -234,7 +234,7 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - writeXMFline(xmfFile, fileName, name, N_total, dim, type); + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, type); /* Write unit conversion factors for this data set */ conversionString(buffer, us, convFactor); @@ -255,22 +255,23 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param grp The group in which to write. * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array to write. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param us The UnitSystem currently in use - * @param convFactor The UnitConversionFactor for this array - * - * - * Calls #error() if an error occurs. + * @param convFactor The UnitConversionFactor for this arrayo */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, long long N_total, - int mpi_rank, long long offset, char* part_c, - size_t partSize, struct UnitSystem* us, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, long long N_total, int mpi_rank, + long long offset, char* part_c, size_t partSize, + struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_memspace = 0, h_filespace = 0; @@ -285,8 +286,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, /* Prepare the arrays in the file */ if (mpi_rank == 0) - prepareArray(grp, fileName, xmfFile, name, type, N_total, dim, us, - convFactor); + prepareArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N_total, + dim, us, convFactor); /* Allocate temporary buffer */ temp = malloc(N * dim * sizeOfType(type)); @@ -370,21 +371,26 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName Unused parameter in non-MPI mode * @param xmfFile Unused parameter in non-MPI mode * @param name The name of the array to write. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part A (char*) pointer on the first occurrence of the field of - *interest - *in the parts array + *interest in the parts array + * @param N_total Unused parameter in non-MPI mode + * @param mpi_rank Unused parameter in non-MPI mode + * @param offset Unused parameter in non-MPI mode * @param field The name (code name) of the field to read from. * @param us The UnitSystem currently in use * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \ - mpi_rank, offset, (char*)(&(part[0]).field), \ +#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, part, N_total, mpi_rank, offset, field, us, \ + convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \ sizeof(part[0]), us, convFactor) /* Import the right hydro definition */ @@ -609,12 +615,12 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, FILE* xmfFile = 0; /* Number of particles of each type */ - //const size_t Ndm = Ntot - Ngas; + // const size_t Ndm = Ntot - Ngas; /* MATTHIEU: Temporary fix to preserve master */ - const size_t Ndm = Ntot > 0 ? Ntot - Ngas: 0; + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; /* MATTHIEU: End temporary fix */ - + /* File name */ char fileName[FILENAME_BUFFER_SIZE]; snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); @@ -643,7 +649,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, xmfFile = prepareXMFfile(); /* Write the part corresponding to this specific output */ - // writeXMFheader(xmfFile, N_total, fileName, e->time); + writeXMFoutputheader(xmfFile, fileName, e->time); /* Open file */ /* message("Opening file '%s'.", fileName); */ @@ -750,6 +756,11 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, /* Don't do anything if no particle of this kind */ if (N_total[ptype] == 0) continue; + /* Add the global information for that particle type to the XMF + * meta-file */ + if (mpi_rank == 0) + writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype); + /* Open the particle group in the file */ char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", @@ -763,9 +774,9 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, switch (ptype) { case GAS: - hydro_write_particles(h_grp, fileName, xmfFile, N[ptype], - N_total[ptype], mpi_rank, offset[ptype], - parts, us); + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, + offset[ptype], parts, us); break; @@ -780,9 +791,9 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, collect_dm_gparts(gparts, Ntot, dmparts, Ndm); /* Write DM particles */ - darkmatter_write_particles(h_grp, fileName, xmfFile, N[ptype], - N_total[ptype], mpi_rank, offset[ptype], - dmparts, us); + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, + xmfFile, N[ptype], N_total[ptype], + mpi_rank, offset[ptype], dmparts, us); /* Free temporary array */ free(dmparts); @@ -794,6 +805,9 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, /* Close particle group */ H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype); } /* Close file */ @@ -805,7 +819,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, } /* Write footer of LXMF file descriptor */ - if (mpi_rank == 0) writeXMFfooter(xmfFile); + if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ ++outputCount; diff --git a/src/single_io.c b/src/single_io.c index 5a48406a79f671e7591bab5529712365906e90a4..801428433ef5170082b68dec425e52f845bb41ae 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -52,16 +52,14 @@ * @param N The number of particles. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of - * @param partSize The size in bytes of the particle structure. *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param importance If COMPULSORY, the data must be present in the IC file. If *OPTIONAL, the array will be zeroed when the data is not present. * * @todo A better version using HDF5 hyper-slabs to read the file directly into *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim, char* part_c, size_t partSize, @@ -139,12 +137,14 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * @param grp The group in which to write. * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array to write. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of - *interest in the parts array + *interest in the parts array. * @param partSize The size in bytes of the particle structure. * @param us The UnitSystem currently in use * @param convFactor The UnitConversionFactor for this array @@ -152,12 +152,11 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * @todo A better version using HDF5 hyper-slabs to write the file directly from *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, char* part_c, - size_t partSize, struct UnitSystem* us, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, char* part_c, size_t partSize, + struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0; void* temp = 0; @@ -239,7 +238,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - writeXMFline(xmfFile, fileName, name, N, dim, type); + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N, dim, type); /* Write unit conversion factors for this data set */ conversionString(buffer, us, convFactor); @@ -283,6 +282,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description * @param name The name of the array to write. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) @@ -296,10 +297,11 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, \ - (char*)(&(part[0]).field), sizeof(part[0]), us, \ +#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, part, N_total, mpi_rank, offset, field, us, \ + convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, (char*)(&(part[0]).field), sizeof(part[0]), us, \ convFactor) /* Import the right hydro definition */ @@ -478,10 +480,10 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { static int outputCount = 0; /* Number of particles of each type */ - //const size_t Ndm = Ntot - Ngas; + // const size_t Ndm = Ntot - Ngas; /* MATTHIEU: Temporary fix to preserve master */ - const size_t Ndm = Ntot > 0 ? Ntot - Ngas: 0; + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; /* MATTHIEU: End temporary fix */ long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; @@ -498,7 +500,7 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { xmfFile = prepareXMFfile(); /* Write the part corresponding to this specific output */ - writeXMFheader(xmfFile, Ngas, fileName, e->time); + writeXMFoutputheader(xmfFile, fileName, e->time); /* Open file */ /* message("Opening file '%s'.", fileName); */ @@ -571,6 +573,9 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Don't do anything if no particle of this kind */ if (numParticles[ptype] == 0) continue; + /* Add the global information for that particle type to the XMF meta-file */ + writeXMFgroupheader(xmfFile, fileName, numParticles[ptype], ptype); + /* Open the particle group in the file */ char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", @@ -587,8 +592,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { switch (ptype) { case GAS: - hydro_write_particles(h_grp, fileName, xmfFile, Ngas, Ngas, 0, 0, parts, - us); + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, Ngas, + Ngas, 0, 0, parts, us); break; case DM: @@ -602,8 +607,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { collect_dm_gparts(gparts, Ntot, dmparts, Ndm); /* Write DM particles */ - darkmatter_write_particles(h_grp, fileName, xmfFile, Ndm, Ndm, 0, 0, - dmparts, us); + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + Ndm, Ndm, 0, 0, dmparts, us); /* Free temporary array */ free(dmparts); @@ -615,10 +620,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Close particle group */ H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + writeXMFgroupfooter(xmfFile, ptype); } /* Write LXMF file descriptor */ - writeXMFfooter(xmfFile); + writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ diff --git a/src/space.c b/src/space.c index fa2541c2872c38f5000e78ef2802d9a4f719f9fc..0cce068b5d8a8060b6ca4cde71aeff9dc6b69e8d 100644 --- a/src/space.c +++ b/src/space.c @@ -97,12 +97,10 @@ const int sortlistID[27] = { int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift) { - int k, sid = 0, periodic = s->periodic; - struct cell *temp; - double dx[3]; - /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { + const int periodic = s->periodic; + double dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = (*cj)->loc[k] - (*ci)->loc[k]; if (periodic && dx[k] < -s->dim[k] / 2) shift[k] = s->dim[k]; @@ -114,15 +112,16 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, } /* Get the sorting index. */ - for (k = 0; k < 3; k++) + int sid = 0; + for (int k = 0; k < 3; k++) sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1)); /* Switch the cells around? */ if (runner_flip[sid]) { - temp = *ci; + struct cell *temp = *ci; *ci = *cj; *cj = temp; - for (k = 0; k < 3; k++) shift[k] = -shift[k]; + for (int k = 0; k < 3; k++) shift[k] = -shift[k]; } sid = sortlistID[sid]; @@ -137,10 +136,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, void space_rebuild_recycle(struct space *s, struct cell *c) { - int k; - if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { space_rebuild_recycle(s, c->progeny[k]); space_recycle(s, c->progeny[k]); @@ -158,19 +155,19 @@ void space_rebuild_recycle(struct space *s, struct cell *c) { void space_regrid(struct space *s, double cell_max, int verbose) { - float h_max = s->cell_min / kernel_gamma / space_stretch, dmin; - int i, j, k, cdim[3], nr_parts = s->nr_parts; + float h_max = s->cell_min / kernel_gamma / space_stretch; + const size_t nr_parts = s->nr_parts; struct cell *restrict c; ticks tic = getticks(); /* Run through the parts and get the current h_max. */ // tic = getticks(); if (s->cells != NULL) { - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max; } } else { - for (k = 0; k < nr_parts; k++) { + for (int k = 0; k < nr_parts; k++) { if (s->parts[k].h > h_max) h_max = s->parts[k].h; } s->h_max = h_max; @@ -190,7 +187,8 @@ void space_regrid(struct space *s, double cell_max, int verbose) { if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max); /* Get the new putative cell dimensions. */ - for (k = 0; k < 3; k++) + int cdim[3]; + for (int k = 0; k < 3; k++) cdim[k] = floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max)); @@ -213,7 +211,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { /* Free the old cells, if they were allocated. */ if (s->cells != NULL) { - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { space_rebuild_recycle(s, &s->cells[k]); if (s->cells[k].sort != NULL) free(s->cells[k].sort); } @@ -222,12 +220,12 @@ void space_regrid(struct space *s, double cell_max, int verbose) { } /* Set the new cell dimensions only if smaller. */ - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { s->cdim[k] = cdim[k]; s->h[k] = s->dim[k] / cdim[k]; s->ih[k] = 1.0 / s->h[k]; } - dmin = fminf(s->h[0], fminf(s->h[1], s->h[2])); + const float dmin = fminf(s->h[0], fminf(s->h[1], s->h[2])); /* Allocate the highest level of cells. */ s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2]; @@ -235,13 +233,13 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->nr_cells * sizeof(struct cell)) != 0) error("Failed to allocate cells."); bzero(s->cells, s->nr_cells * sizeof(struct cell)); - for (k = 0; k < s->nr_cells; k++) + for (int k = 0; k < s->nr_cells; k++) if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock."); /* Set the cell location and sizes. */ - for (i = 0; i < cdim[0]; i++) - for (j = 0; j < cdim[1]; j++) - for (k = 0; k < cdim[2]; k++) { + for (int i = 0; i < cdim[0]; i++) + for (int j = 0; j < cdim[1]; j++) + for (int k = 0; k < cdim[2]; k++) { c = &s->cells[cell_getid(cdim, i, j, k)]; c->loc[0] = i * s->h[0]; c->loc[1] = j * s->h[1]; @@ -271,7 +269,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { else { /* Free the old cells, if they were allocated. */ - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { space_rebuild_recycle(s, &s->cells[k]); s->cells[k].sorts = NULL; s->cells[k].nr_tasks = 0; @@ -308,7 +306,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { void space_rebuild(struct space *s, double cell_max, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); /* Be verbose about this. */ // message( "re)building space..." ); fflush(stdout); @@ -320,23 +318,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { int nr_gparts = s->nr_gparts; struct cell *restrict cells = s->cells; - double ih[3], dim[3]; - int cdim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - cdim[0] = s->cdim[0]; - cdim[1] = s->cdim[1]; - cdim[2] = s->cdim[2]; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; /* Run through the particles and get their cell index. */ // tic = getticks(); const size_t ind_size = s->size_parts; - size_t *ind; - if ((ind = (size_t *)malloc(sizeof(size_t) * ind_size)) == NULL) + int *ind; + if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); for (int k = 0; k < nr_parts; k++) { struct part *restrict p = &s->parts[k]; @@ -349,37 +339,91 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count++; } + // message( "getting particle indices took %.3f %s." , + // clocks_from_ticks(getticks() - tic), clocks_getunit()): + + /* Run through the gravity particles and get their cell index. */ + // tic = getticks(); + const size_t gind_size = s->size_gparts; + int *gind; + if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) + error("Failed to allocate temporary g-particle indices."); + for (int k = 0; k < nr_gparts; k++) { + struct gpart *restrict gp = &s->gparts[k]; + for (int j = 0; j < 3; j++) + if (gp->x[j] < 0.0) + gp->x[j] += dim[j]; + else if (gp->x[j] >= dim[j]) + gp->x[j] -= dim[j]; + gind[k] = + cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); + cells[gind[k]].gcount++; + } // message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()): +// clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ - const int nodeID = s->e->nodeID; + const int local_nodeID = s->e->nodeID; for (int k = 0; k < nr_parts; k++) - if (cells[ind[k]].nodeID != nodeID) { + if (cells[ind[k]].nodeID != local_nodeID) { cells[ind[k]].count -= 1; nr_parts -= 1; - struct part tp = s->parts[k]; + const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; - struct xpart txp = s->xparts[k]; + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->part = &s->parts[k]; + } + if (s->parts[nr_parts].gpart != NULL) { + s->parts[nr_parts].gpart->part = &s->parts[nr_parts]; + } + const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; s->xparts[nr_parts] = txp; - int t = ind[k]; + const int t = ind[k]; ind[k] = ind[nr_parts]; ind[nr_parts] = t; } + /* Move non-local gparts to the end of the list. */ + for (int k = 0; k < nr_gparts; k++) + if (cells[gind[k]].nodeID != local_nodeID) { + cells[gind[k]].gcount -= 1; + nr_gparts -= 1; + const struct gpart tp = s->gparts[k]; + s->gparts[k] = s->gparts[nr_gparts]; + s->gparts[nr_gparts] = tp; + if (s->gparts[k].id > 0) { + s->gparts[k].part->gpart = &s->gparts[k]; + } + if (s->gparts[nr_gparts].id > 0) { + s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts]; + } + const int t = gind[k]; + gind[k] = gind[nr_gparts]; + gind[nr_gparts] = t; + } + /* Exchange the strays, note that this potentially re-allocates the parts arrays. */ - s->nr_parts = - nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], - s->nr_parts - nr_parts); + /* TODO: This function also exchanges gparts, but this is shorted-out + until they are fully implemented. */ + size_t nr_parts_exchanged = s->nr_parts - nr_parts; + size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, + nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged); + + /* Add post-processing, i.e. re-linking/creating of gparts here. */ + + /* Set the new particle counts. */ + s->nr_parts = nr_parts + nr_parts_exchanged; + s->nr_gparts = nr_gparts + nr_gparts_exchanged; /* Re-allocate the index array if needed.. */ if (s->nr_parts > ind_size) { - size_t *ind_new; - if ((ind_new = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) + int *ind_new; + if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate temporary particle indices."); memcpy(ind_new, ind, sizeof(size_t) * nr_parts); free(ind); @@ -388,7 +432,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Assign each particle to its cell. */ for (int k = nr_parts; k < s->nr_parts; k++) { - struct part *p = &s->parts[k]; + const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count += 1; @@ -419,65 +463,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(ind); - /* Run through the gravity particles and get their cell index. */ - // tic = getticks(); - const size_t gind_size = s->size_gparts; - size_t *gind; - if ((gind = (size_t *)malloc(sizeof(size_t) * gind_size)) == NULL) - error("Failed to allocate temporary g-particle indices."); - for (int k = 0; k < nr_gparts; k++) { - struct gpart *gp = &s->gparts[k]; - for (int j = 0; j < 3; j++) - if (gp->x[j] < 0.0) - gp->x[j] += dim[j]; - else if (gp->x[j] >= dim[j]) - gp->x[j] -= dim[j]; - gind[k] = - cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells[gind[k]].gcount++; - } -// message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); - #ifdef WITH_MPI - /* Move non-local gparts to the end of the list. */ - for (int k = 0; k < nr_gparts; k++) - if (cells[ind[k]].nodeID != nodeID) { - cells[ind[k]].gcount -= 1; - nr_gparts -= 1; - struct gpart tp = s->gparts[k]; - s->gparts[k] = s->gparts[nr_gparts]; - s->gparts[nr_gparts] = tp; - int t = ind[k]; - ind[k] = ind[nr_gparts]; - ind[nr_gparts] = t; - } - - /* Exchange the strays, note that this potentially re-allocates - the parts arrays. */ - // s->nr_gparts = - // nr_gparts + engine_exchange_strays(s->e, nr_gparts, &ind[nr_gparts], - // s->nr_gparts - nr_gparts); - if (nr_gparts > 0) - error("Need to implement the exchange of strays for the gparts"); - /* Re-allocate the index array if needed.. */ if (s->nr_gparts > gind_size) { - size_t *gind_new; - if ((gind_new = (size_t *)malloc(sizeof(size_t) * s->nr_gparts)) == NULL) + int *gind_new; + if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) error("Failed to allocate temporary g-particle indices."); - memcpy(gind_new, gind, sizeof(size_t) * nr_gparts); + memcpy(gind_new, gind, sizeof(int) * nr_gparts); free(gind); gind = gind_new; } /* Assign each particle to its cell. */ for (int k = nr_gparts; k < s->nr_gparts; k++) { - struct gpart *p = &s->gparts[k]; + const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[gind[k]].count += 1; + cells[gind[k]].gcount += 1; /* if ( cells[ ind[k] ].nodeID != nodeID ) error( "Received part that does not belong to me (nodeID=%i)." , cells[ ind[k] ].nodeID ); */ @@ -531,7 +534,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { */ void space_split(struct space *s, struct cell *cells, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); for (int k = 0; k < s->nr_cells; k++) scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k, @@ -555,7 +558,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param verbose Are we talkative ? */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose) { ticks tic = getticks(); @@ -603,7 +606,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, void space_do_parts_sort() { /* Pointers to the sorting data. */ - size_t *ind = space_sort_struct.ind; + int *ind = space_sort_struct.ind; struct part *parts = space_sort_struct.parts; struct xpart *xparts = space_sort_struct.xparts; @@ -725,7 +728,7 @@ void space_do_parts_sort() { } /* main loop. */ } -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, +void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min, int max) { struct qstack { diff --git a/src/space.h b/src/space.h index 20f86562010f59f487c9783e83d540c196ddee83..e761595838ae78b0d8a67cca676cfa59f3f700f6 100644 --- a/src/space.h +++ b/src/space.h @@ -103,6 +103,8 @@ struct space { /* Buffers for parts that we will receive from foreign cells. */ struct part *parts_foreign; size_t nr_parts_foreign, size_parts_foreign; + struct gpart *gparts_foreign; + size_t nr_gparts_foreign, size_gparts_foreign; }; /* Interval stack necessary for parallel particle sorting. */ @@ -114,7 +116,7 @@ struct qstack { struct parallel_sort { struct part *parts; struct xpart *xparts; - size_t *ind; + int *ind; struct qstack *stack; unsigned int stack_size; volatile unsigned int first, last, waiting; @@ -122,9 +124,9 @@ struct parallel_sort { extern struct parallel_sort space_sort_struct; /* function prototypes. */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose); -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, +void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min, int max); struct cell *space_getcell(struct space *s); int space_getsid(struct space *s, struct cell **ci, struct cell **cj,