diff --git a/.gitignore b/.gitignore index 8137ea759b24b3f4ec9909a460da4bcb47b0a1ac..9bae25ebff81d077253fd8f1227aad98545d28a0 100644 --- a/.gitignore +++ b/.gitignore @@ -25,15 +25,23 @@ examples/swift_mindt examples/swift_mindt_mpi examples/swift_mpi -tests/testVectorize -tests/brute_force.dat -tests/swift_dopair.dat +tests/testPair +tests/brute_force_standard.dat +tests/swift_dopair_standard.dat +tests/brute_force_perturbed.dat +tests/swift_dopair_perturbed.dat +tests/test27cells +tests/brute_force_27_standard.dat +tests/swift_dopair_27_standard.dat +tests/brute_force_27_perturbed.dat +tests/swift_dopair_27_perturbed.dat tests/testGreetings tests/testReading tests/input.hdf5 tests/testSingle tests/testTimeIntegration tests/testSPHStep +tests/testParser theory/latex/swift.pdf diff --git a/examples/main.c b/examples/main.c index c88f92a07a747c327692b5e0fbbc7dc07b93ac0c..9523af49ed30c54d256d287ea2846a854650bc05 100644 --- a/examples/main.c +++ b/examples/main.c @@ -55,7 +55,6 @@ * @brief Main routine that loads a few particles and generates some output. * */ - int main(int argc, char *argv[]) { int c, icount, periodic = 1; @@ -79,7 +78,10 @@ int main(int argc, char *argv[]) { int nr_nodes = 1, myrank = 0; FILE *file_thread; int with_outputs = 1; - int verbose = 0, talking; + int with_external_gravity = 0; + int with_self_gravity = 0; + int engine_policies = 0; + int verbose = 0, talking = 0; unsigned long long cpufreq = 0; #ifdef WITH_MPI @@ -97,12 +99,15 @@ int main(int argc, char *argv[]) { #endif #endif -/* Choke on FP-exceptions. */ -// feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW ); + /* Choke on FP-exceptions. */ + // feenableexcept( FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW ); + + /* Initialize CPU frequency, this also starts time. */ + clocks_set_cpufreq(cpufreq); #ifdef WITH_MPI /* Start by initializing MPI. */ - int res, prov; + int res = 0, prov = 0; if ((res = MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &prov)) != MPI_SUCCESS) error("Call to MPI_Init failed with error %i.", res); @@ -128,9 +133,6 @@ int main(int argc, char *argv[]) { &initial_partition.grid[1], &initial_partition.grid[0]); #endif - /* Initialize CPU frequency, this also starts time. */ - clocks_set_cpufreq(cpufreq); - /* Greeting message */ if (myrank == 0) greetings(); @@ -156,7 +158,7 @@ int main(int argc, char *argv[]) { bzero(&s, sizeof(struct space)); /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:e:f:h:m:oP:q:R:s:t:v:w:y:z:")) != -1) + while ((c = getopt(argc, argv, "a:c:d:e:f:gGh:m:oP:q:R:s:t:v:w:y:z:")) != -1) switch (c) { case 'a': if (sscanf(optarg, "%lf", &scaling) != 1) @@ -185,6 +187,12 @@ int main(int argc, char *argv[]) { case 'f': if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name."); break; + case 'g': + with_external_gravity = 1; + break; + case 'G': + with_self_gravity = 1; + break; case 'h': if (sscanf(optarg, "%llu", &cpufreq) != 1) error("Error parsing CPU frequency."); @@ -343,10 +351,6 @@ int main(int argc, char *argv[]) { message("CPU frequency used for tick conversion: %llu Hz", cpufreq); } - /* Check we have sensible time step bounds */ - if (dt_min > dt_max) - error("Minimal time step size must be large than maximal time step size "); - /* Check whether an IC file has been provided */ if (strcmp(ICfileName, "") == 0) error("An IC file name must be provided via the option -f"); @@ -356,11 +360,11 @@ int main(int argc, char *argv[]) { if (myrank == 0) clocks_gettime(&tic); #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) - read_ic_parallel(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes, - MPI_COMM_WORLD, MPI_INFO_NULL); + read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); #else - read_ic_serial(ICfileName, dim, &parts, &Ngas, &periodic, myrank, nr_nodes, - MPI_COMM_WORLD, MPI_INFO_NULL); + read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); #endif #else read_ic_single(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic); @@ -376,6 +380,7 @@ int main(int argc, char *argv[]) { #if defined(WITH_MPI) long long N_long[2] = {Ngas, Ngpart}; MPI_Reduce(&N_long, &N_total, 2, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + N_total[1] -= N_total[0]; if (myrank == 0) message("Read %lld gas particles and %lld DM particles from the ICs", N_total[0], N_total[1]); @@ -383,8 +388,33 @@ int main(int argc, char *argv[]) { N_total[0] = Ngas; N_total[1] = Ngpart - Ngas; message("Read %lld gas particles and %lld DM particles from the ICs", - N_total[0], N_total[1]); + N_total[0], N_total[1]); +#endif + + /* MATTHIEU: Temporary fix to preserve master */ + if (!with_external_gravity && !with_self_gravity) { + free(gparts); + gparts = NULL; + for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL; + Ngpart = 0; +#if defined(WITH_MPI) + N_long[0] = Ngas; + N_long[1] = Ngpart; + MPI_Reduce(&N_long, &N_total, 2, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_WORLD); + if (myrank == 0) + message( + "AFTER FIX: Read %lld gas particles and %lld DM particles from the " + "ICs", + N_total[0], N_total[1]); +#else + N_total[0] = Ngas; + N_total[1] = Ngpart; + message( + "AFTER FIX: Read %lld gas particles and %lld DM particles from the ICs", + N_total[0], N_total[1]); #endif + } + /* MATTHIEU: End temporary fix */ /* Apply h scaling */ if (scaling != 1.0) @@ -448,12 +478,16 @@ int main(int argc, char *argv[]) { message("nr of cells at depth %i is %i.", data[0], data[1]); } + /* Construct the engine policy */ + engine_policies = ENGINE_POLICY | engine_policy_steal | engine_policy_hydro; + if (with_external_gravity) engine_policies |= engine_policy_external_gravity; + if (with_self_gravity) engine_policies |= engine_policy_self_gravity; + /* Initialize the engine with this space. */ if (myrank == 0) clocks_gettime(&tic); if (myrank == 0) message("nr_nodes is %i.", nr_nodes); engine_init(&e, &s, dt_max, nr_threads, nr_queues, nr_nodes, myrank, - ENGINE_POLICY | engine_policy_steal | engine_policy_hydro, 0, - time_end, dt_min, dt_max, talking); + engine_policies, 0, time_end, dt_min, dt_max, talking); if (myrank == 0 && verbose) { clocks_gettime(&toc); message("engine_init took %.3f %s.", clocks_diff(&tic, &toc), @@ -510,8 +544,8 @@ int main(int argc, char *argv[]) { /* Legend */ if (myrank == 0) printf( - "# Step Time time-step Number of updates CPU Wall-clock time " - "[%s]\n", + "# Step Time time-step Number of updates Number of updates " + "CPU Wall-clock time [%s]\n", clocks_getunit()); /* Let loose a runner on the space. */ diff --git a/src/Makefile.am b/src/Makefile.am index f44d47819672d10445fd969fe2ff20dbcb49463b..15c05a2a00d33ad86e7144b4a8e377252a2eedce 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,13 +35,13 @@ endif # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \ - common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h + common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel.c tools.c part.c partition.c clocks.c + kernel.c tools.c part.c partition.c clocks.c parser.c # Include files for distribution, not installation. nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \ diff --git a/src/cell.c b/src/cell.c index 696f53069b9974c94f8b25e10f7dcba81fae8069..31a632a5b40a7706eeef6accc385d57e27f0f247 100644 --- a/src/cell.c +++ b/src/cell.c @@ -45,6 +45,7 @@ /* Local headers. */ #include "atomic.h" #include "error.h" +#include "gravity.h" #include "hydro.h" #include "space.h" #include "timers.h" @@ -89,14 +90,18 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; c->count = pc->count; + c->gcount = pc->gcount; c->tag = pc->tag; - /* Fill the progeny recursively, depth-first. */ + /* Number of new cells created. */ int count = 1; + + /* Fill the progeny recursively, depth-first. */ for (int k = 0; k < 8; k++) if (pc->progeny[k] >= 0) { struct cell *temp = space_getcell(s); temp->count = 0; + temp->gcount = 0; temp->loc[0] = c->loc[0]; temp->loc[1] = c->loc[1]; temp->loc[2] = c->loc[2]; @@ -122,7 +127,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /** - * @brief Link the cells recursively to the given part array. + * @brief Link the cells recursively to the given #part array. * * @param c The #cell. * @param parts The #part array. @@ -130,7 +135,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { * @return The number of particles linked. */ -int cell_link(struct cell *c, struct part *parts) { +int cell_link_parts(struct cell *c, struct part *parts) { c->parts = parts; @@ -139,14 +144,40 @@ int cell_link(struct cell *c, struct part *parts) { int offset = 0; for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) - offset += cell_link(c->progeny[k], &parts[offset]); + offset += cell_link_parts(c->progeny[k], &parts[offset]); } } - /* Return the total number of unpacked cells. */ + /* Return the total number of linked particles. */ return c->count; } +/** + * @brief Link the cells recursively to the given #gpart array. + * + * @param c The #cell. + * @param gparts The #gpart array. + * + * @return The number of particles linked. + */ + +int cell_link_gparts(struct cell *c, struct gpart *gparts) { + + c->gparts = gparts; + + /* Fill the progeny recursively, depth-first. */ + if (c->split) { + int offset = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + offset += cell_link_gparts(c->progeny[k], &gparts[offset]); + } + } + + /* Return the total number of linked particles. */ + return c->gcount; +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * @@ -164,6 +195,7 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; pc->count = c->count; + pc->gcount = c->gcount; c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; /* Fill in the progeny, depth-first recursion. */ @@ -574,6 +606,27 @@ void cell_init_parts(struct cell *c, void *data) { c->ti_end_max = 0; } +/** + * @brief Initialises all g-particles to a valid state even if the ICs were + *stupid + * + * @param c Cell to act upon + * @param data Unused parameter + */ +void cell_init_gparts(struct cell *c, void *data) { + + struct gpart *gp = c->gparts; + const int gcount = c->gcount; + + for (int i = 0; i < gcount; ++i) { + gp[i].ti_begin = 0; + gp[i].ti_end = 0; + gravity_first_init_gpart(&gp[i]); + } + c->ti_end_min = 0; + c->ti_end_max = 0; +} + /** * @brief Converts hydro quantities to a valid state after the initial density *calculation diff --git a/src/cell.h b/src/cell.h index 857aa9282930fea330df03992ae140f97ae0f630..8b65fa1904a4aa407a15bc30954651dc5c4e29e5 100644 --- a/src/cell.h +++ b/src/cell.h @@ -47,7 +47,7 @@ struct pcell { int ti_end_min, ti_end_max; /* Number of particles in this cell. */ - int count; + int count, gcount; /* tag used for MPI communication. */ int tag; @@ -144,7 +144,7 @@ struct cell { double mass, e_pot, e_int, e_kin; /* Number of particles updated in this cell. */ - int updated; + int updated, g_updated; /* Linking pointer for "memory management". */ struct cell *next; @@ -178,8 +178,10 @@ void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); int cell_getsize(struct cell *c); -int cell_link(struct cell *c, struct part *parts); +int cell_link_parts(struct cell *c, struct part *parts); +int cell_link_gparts(struct cell *c, struct gpart *gparts); void cell_init_parts(struct cell *c, void *data); +void cell_init_gparts(struct cell *c, void *data); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); diff --git a/src/common_io.c b/src/common_io.c index b3d24aec402fc1cc38255239c60e3e630f33b051..9e162bc350f13b543a471927d3a4720a43a295d2 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -45,6 +45,9 @@ #include "kernel.h" #include "version.h" +const char* particle_type_names[NUM_PARTICLE_TYPES] = { + "Gas", "DM", "Boundary", "Dummy", "Star", "BH"}; + /** * @brief Converts a C data type to the HDF5 equivalent. * @@ -402,52 +405,68 @@ void createXMFfile() { *snapshot * * @param xmfFile The file to write in. - * @param Nparts The number of particles. * @param hdfFileName The name of the HDF5 file corresponding to this output. * @param time The current simulation time. */ -void writeXMFheader(FILE* xmfFile, long long Nparts, char* hdfFileName, - float time) { +void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time) { /* Write end of file */ + fprintf(xmfFile, "<!-- XMF description for file: %s -->\n", hdfFileName); fprintf(xmfFile, "<Grid GridType=\"Collection\" CollectionType=\"Spatial\">\n"); fprintf(xmfFile, "<Time Type=\"Single\" Value=\"%f\"/>\n", time); - fprintf(xmfFile, "<Grid Name=\"Gas\" GridType=\"Uniform\">\n"); - fprintf(xmfFile, - "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%lld\"/>\n", - Nparts); - fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n"); - fprintf(xmfFile, - "<DataItem Dimensions=\"%lld 3\" NumberType=\"Double\" " - "Precision=\"8\" " - "Format=\"HDF\">%s:/PartType0/Coordinates</DataItem>\n", - Nparts, hdfFileName); - fprintf(xmfFile, "</Geometry>"); } /** * @brief Writes the end of the XMF file (closes all open markups) * * @param xmfFile The file to write in. + * @param output The number of this output. + * @param time The current simulation time. */ -void writeXMFfooter(FILE* xmfFile) { +void writeXMFoutputfooter(FILE* xmfFile, int output, float time) { /* Write end of the section of this time step */ - fprintf(xmfFile, "\n</Grid>\n"); - fprintf(xmfFile, "</Grid>\n"); - fprintf(xmfFile, "\n</Grid>\n"); + fprintf(xmfFile, + "\n</Grid> <!-- End of meta-data for output=%03i, time=%f -->\n", + output, time); + fprintf(xmfFile, "\n</Grid> <!-- timeSeries -->\n"); fprintf(xmfFile, "</Domain>\n"); fprintf(xmfFile, "</Xdmf>\n"); fclose(xmfFile); } +void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, + enum PARTICLE_TYPE ptype) { + fprintf(xmfFile, "\n<Grid Name=\"%s\" GridType=\"Uniform\">\n", + particle_type_names[ptype]); + fprintf(xmfFile, + "<Topology TopologyType=\"Polyvertex\" Dimensions=\"%zi\"/>\n", N); + fprintf(xmfFile, "<Geometry GeometryType=\"XYZ\">\n"); + fprintf(xmfFile, + "<DataItem Dimensions=\"%zi 3\" NumberType=\"Double\" " + "Precision=\"8\" " + "Format=\"HDF\">%s:/PartType%d/Coordinates</DataItem>\n", + N, hdfFileName, ptype); + fprintf(xmfFile, + "</Geometry>\n <!-- Done geometry for %s, start of particle fields " + "list -->\n", + particle_type_names[ptype]); +} + +void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype) { + fprintf(xmfFile, "</Grid> <!-- End of meta-data for parttype=%s -->\n", + particle_type_names[ptype]); +} + /** * @brief Writes the lines corresponding to an array of the HDF5 output * * @param xmfFile The file in which to write * @param fileName The name of the HDF5 file associated to this XMF descriptor. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array in the HDF5 file. * @param N The number of particles. * @param dim The dimension of the quantity (1 for scalars, 3 for vectors). @@ -455,21 +474,21 @@ void writeXMFfooter(FILE* xmfFile) { * * @todo Treat the types in a better way. */ -void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, - int dim, enum DATA_TYPE type) { +void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, + char* name, size_t N, int dim, enum DATA_TYPE type) { fprintf(xmfFile, "<Attribute Name=\"%s\" AttributeType=\"%s\" Center=\"Node\">\n", name, dim == 1 ? "Scalar" : "Vector"); if (dim == 1) fprintf(xmfFile, - "<DataItem Dimensions=\"%lld\" NumberType=\"Double\" " - "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n", - N, type == FLOAT ? 4 : 8, fileName, name); + "<DataItem Dimensions=\"%zi\" NumberType=\"Double\" " + "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", + N, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); else fprintf(xmfFile, - "<DataItem Dimensions=\"%lld %d\" NumberType=\"Double\" " - "Precision=\"%d\" Format=\"HDF\">%s:/PartType0/%s</DataItem>\n", - N, dim, type == FLOAT ? 4 : 8, fileName, name); + "<DataItem Dimensions=\"%zi %d\" NumberType=\"Double\" " + "Precision=\"%d\" Format=\"HDF\">%s:%s/%s</DataItem>\n", + N, dim, type == FLOAT ? 4 : 8, fileName, partTypeGroupName, name); fprintf(xmfFile, "</Attribute>\n"); } @@ -483,14 +502,13 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, * @param gparts The array of #gpart freshly read in. * @param Ndm The number of DM particles read in. */ -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { /* Let's give all these gparts a negative id */ for (size_t i = 0; i < Ndm; ++i) { - /* 0 or negative ids are not allowed */ if (gparts[i].id_or_neg_offset <= 0) - error("0 or negative ID for DM particle"); + error("0 or negative ID for DM particle %zd: ID=%lld", i, gparts[i].id_or_neg_offset); } } @@ -506,8 +524,9 @@ void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { * @param Ngas The number of gas particles read in. * @param Ndm The number of DM particles read in. */ -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm) { +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm) { for (size_t i = 0; i < Ngas; ++i) { @@ -536,14 +555,17 @@ void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, * @param dmparts The array of #gpart containg DM particles to be filled. * @param Ndm The number of DM particles. */ -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm) { +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm) { size_t count = 0; /* Loop over all gparts */ for (size_t i = 0; i < Ntot; ++i) { + /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id, + * gparts[i].part); */ + /* And collect the DM ones */ if (gparts[i].id_or_neg_offset > 0) { dmparts[count] = gparts[i]; diff --git a/src/common_io.h b/src/common_io.h index 2623a03f9a25ce0e650dde4f698da6eb49177e26..961f40e63d771e5e06ade525301caf59aae0bceb 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -70,14 +70,20 @@ enum PARTICLE_TYPE { NUM_PARTICLE_TYPES }; +extern const char* particle_type_names[]; + +#define FILENAME_BUFFER_SIZE 150 +#define PARTICLE_GROUP_BUFFER_SIZE 20 + hid_t hdf5Type(enum DATA_TYPE type); size_t sizeOfType(enum DATA_TYPE type); -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm); -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm); -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm); +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm); +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm); +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm); void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data); @@ -92,10 +98,13 @@ void writeAttribute_s(hid_t grp, char* name, const char* str); void createXMFfile(); FILE* prepareXMFfile(); -void writeXMFfooter(FILE* xmfFile); -void writeXMFheader(FILE* xmfFile, long long N, char* hdfFileName, float time); -void writeXMFline(FILE* xmfFile, char* fileName, char* name, long long N, - int dim, enum DATA_TYPE type); +void writeXMFoutputheader(FILE* xmfFile, char* hdfFileName, float time); +void writeXMFoutputfooter(FILE* xmfFile, int outputCount, float time); +void writeXMFgroupheader(FILE* xmfFile, char* hdfFileName, size_t N, + enum PARTICLE_TYPE ptype); +void writeXMFgroupfooter(FILE* xmfFile, enum PARTICLE_TYPE ptype); +void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, + char* name, size_t N, int dim, enum DATA_TYPE type); void writeCodeDescription(hid_t h_file); void writeSPHflavour(hid_t h_file); diff --git a/src/engine.c b/src/engine.c index b7658535335bd02d309c9cf69da61ffcc2f6c160..4cd33ef20f5aa040d075eb76ac07db5413062f7b 100644 --- a/src/engine.c +++ b/src/engine.c @@ -139,39 +139,56 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c, * @brief Redistribute the particles amongst the nodes according * to their cell's node IDs. * + * The strategy here is as follows: + * 1) Each node counts the number of particles it has to send to each other + * node. + * 2) The number of particles of each type is then exchanged. + * 3) The particles to send are placed in a temporary buffer in which the + * part-gpart links are preserved. + * 4) Each node allocates enough space for the new particles. + * 5) (Asynchronous) communications are issued to transfer the data. + * + * * @param e The #engine. */ - void engine_redistribute(struct engine *e) { #ifdef WITH_MPI - int nr_nodes = e->nr_nodes, nodeID = e->nodeID; + const int nr_nodes = e->nr_nodes; + const int nodeID = e->nodeID; struct space *s = e->s; - int my_cells = 0; - int *cdim = s->cdim; struct cell *cells = s->cells; - int nr_cells = s->nr_cells; + const int nr_cells = s->nr_cells; + const int *cdim = s->cdim; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + struct part *parts = s->parts; + struct xpart *xparts = s->xparts; + struct gpart *gparts = s->gparts; ticks tic = getticks(); - /* Start by sorting the particles according to their nodes and - getting the counts. The counts array is indexed as - count[from * nr_nodes + to]. */ - int *counts; - size_t *dest; - double ih[3], dim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - if ((counts = (int *)malloc(sizeof(int) *nr_nodes *nr_nodes)) == NULL || - (dest = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) - error("Failed to allocate count and dest buffers."); + /* Allocate temporary arrays to store the counts of particles to be sent + and the destination of each particle */ + int *counts, *g_counts; + if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate count temporary buffer."); + if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate gcount temporary buffer."); bzero(counts, sizeof(int) * nr_nodes * nr_nodes); - struct part *parts = s->parts; + bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes); + + // Allocate the destination index arrays. + int *dest, *g_dest; + if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) + error("Failed to allocate dest temporary buffer."); + if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) + error("Failed to allocate g_dest temporary buffer."); + + /* Get destination of each particle */ for (size_t k = 0; k < s->nr_parts; k++) { + + /* Periodic boundary conditions */ for (int j = 0; j < 3; j++) { if (parts[k].x[j] < 0.0) parts[k].x[j] += dim[j]; @@ -184,36 +201,121 @@ void engine_redistribute(struct engine *e) { error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */ dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ counts[nodeID * nr_nodes + dest[k]] += 1; } + + /* Sort the particles according to their cell index. */ space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + /* We need to re-link the gpart partners of parts. */ + int current_dest = dest[0]; + size_t count_this_dest = 0; + for (size_t k = 0; k < s->nr_parts; ++k) { + if (s->parts[k].gpart != NULL) { + + /* As the addresses will be invalidated by the communications, we will */ + /* instead store the absolute index from the start of the sub-array */ + /* of particles to be sent to a given node. */ + /* Recall that gparts without partners have a negative id. */ + /* We will restore the pointers on the receiving node later on. */ + if (dest[k] != current_dest) { + current_dest = dest[k]; + count_this_dest = 0; + } + + /* Debug */ + /* if(s->parts[k].gpart->id < 0) */ + /* error("Trying to link a partnerless gpart !"); */ + + s->parts[k].gpart->id_or_neg_offset = -count_this_dest; + count_this_dest++; + } + } + + /* Get destination of each g-particle */ + for (size_t k = 0; k < s->nr_gparts; k++) { + + /* Periodic boundary conditions */ + for (int j = 0; j < 3; j++) { + if (gparts[k].x[j] < 0.0) + gparts[k].x[j] += dim[j]; + else if (gparts[k].x[j] >= dim[j]) + gparts[k].x[j] -= dim[j]; + } + const int cid = cell_getid(cdim, gparts[k].x[0] * ih[0], + gparts[k].x[1] * ih[1], gparts[k].x[2] * ih[2]); + /* if (cid < 0 || cid >= s->nr_cells) + error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", + cid, k, g_parts[k].x[0], g_parts[k].x[1], g_parts[k].x[2]); */ + g_dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ + g_counts[nodeID * nr_nodes + g_dest[k]] += 1; + } + + /* Sort the gparticles according to their cell index. */ + space_gparts_sort(gparts, g_dest, s->nr_gparts, 0, nr_nodes - 1); + /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle transfer counts."); - /* Get the new number of parts for this node, be generous in allocating. */ - size_t nr_parts = 0; + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Each node knows how many parts and gparts will be transferred to every + other node. We can start preparing to receive data */ + + /* Get the new number of parts and gparts for this node */ + size_t nr_parts = 0, nr_gparts = 0; for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_gparts += g_counts[k * nr_nodes + nodeID]; + + /* Allocate the new arrays with some extra margin */ struct part *parts_new = NULL; - struct xpart *xparts_new = NULL, *xparts = s->xparts; + struct xpart *xparts_new = NULL; + struct gpart *gparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, - sizeof(struct part) * nr_parts * 1.2) != 0 || - posix_memalign((void **)&xparts_new, part_align, - sizeof(struct xpart) * nr_parts * 1.2) != 0) + sizeof(struct part) * nr_parts * + engine_redistribute_alloc_margin) != 0) error("Failed to allocate new part data."); - - /* Emit the sends and recvs for the particle data. */ + if (posix_memalign((void **)&xparts_new, xpart_align, + sizeof(struct xpart) * nr_parts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new xpart data."); + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * nr_gparts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new gpart data."); + + /* Prepare MPI requests for the asynchronous communications */ MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) == + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) == NULL) error("Failed to allocate MPI request list."); - for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; - for (size_t offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) { - int ind_send = nodeID * nr_nodes + k; - int ind_recv = k * nr_nodes + nodeID; + for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + + /* Emit the sends and recvs for the particle and gparticle data. */ + size_t offset_send = 0, offset_recv = 0; + size_t g_offset_send = 0, g_offset_recv = 0; + for (int k = 0; k < nr_nodes; k++) { + + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; + + /* Are we sending any part/xpart ? */ if (counts[ind_send] > 0) { + + /* message("Sending %d part to node %d", counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ if (k == nodeID) { memcpy(&parts_new[offset_recv], &s->parts[offset_send], sizeof(struct part) * counts[ind_recv]); @@ -221,36 +323,73 @@ void engine_redistribute(struct engine *e) { sizeof(struct xpart) * counts[ind_recv]); offset_send += counts[ind_send]; offset_recv += counts[ind_recv]; + + /* Else, emit some communications */ } else { - if (MPI_Isend(&s->parts[offset_send], counts[ind_send], - e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD, - &reqs[4 * k]) != MPI_SUCCESS) - error("Failed to isend parts to node %zi.", k); - if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], - e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD, - &reqs[4 * k + 1]) != MPI_SUCCESS) - error("Failed to isend xparts to node %zi.", k); + if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type, + k, 3 * ind_send + 0, MPI_COMM_WORLD, + &reqs[6 * k]) != MPI_SUCCESS) + error("Failed to isend parts to node %i.", k); + if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type, + k, 3 * ind_send + 1, MPI_COMM_WORLD, + &reqs[6 * k + 1]) != MPI_SUCCESS) + error("Failed to isend xparts to node %i.", k); offset_send += counts[ind_send]; } } + + /* Are we sending any gpart ? */ + if (g_counts[ind_send] > 0) { + + /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ + if (k == nodeID) { + memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send], + sizeof(struct gpart) * g_counts[ind_recv]); + g_offset_send += g_counts[ind_send]; + g_offset_recv += g_counts[ind_recv]; + + /* Else, emit some communications */ + } else { + if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send], + gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD, + &reqs[6 * k + 2]) != MPI_SUCCESS) + error("Failed to isend gparts to node %i.", k); + g_offset_send += g_counts[ind_send]; + } + } + + /* Now emit the corresponding Irecv() */ + + /* Are we receiving any part/xpart from this node ? */ if (k != nodeID && counts[ind_recv] > 0) { - if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type, - k, 2 * ind_recv + 0, MPI_COMM_WORLD, - &reqs[4 * k + 2]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); - if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], - e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD, - &reqs[4 * k + 3]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); + if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k, + 3 * ind_recv + 0, MPI_COMM_WORLD, + &reqs[6 * k + 3]) != MPI_SUCCESS) + error("Failed to emit irecv of parts from node %i.", k); + if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type, + k, 3 * ind_recv + 1, MPI_COMM_WORLD, + &reqs[6 * k + 4]) != MPI_SUCCESS) + error("Failed to emit irecv of xparts from node %i.", k); offset_recv += counts[ind_recv]; } + + /* Are we receiving any gpart from this node ? */ + if (k != nodeID && g_counts[ind_recv] > 0) { + if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv], + gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD, + &reqs[6 * k + 5]) != MPI_SUCCESS) + error("Failed to emit irecv of gparts from node %i.", k); + g_offset_recv += g_counts[ind_recv]; + } } /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[4 * nr_nodes]; + MPI_Status stats[6 * nr_nodes]; int res; - if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 4 * nr_nodes; k++) { + if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 6 * nr_nodes; k++) { char buff[MPI_MAX_ERROR_STRING]; int res; MPI_Error_string(stats[k].MPI_ERROR, buff, &res); @@ -259,35 +398,92 @@ void engine_redistribute(struct engine *e) { error("Failed during waitall for part data."); } + /* We now need to restore the part<->gpart links */ + size_t offset_parts = 0, offset_gparts = 0; + for (int node = 0; node < nr_nodes; ++node) { + + const int ind_recv = node * nr_nodes + nodeID; + const size_t count_parts = counts[ind_recv]; + const size_t count_gparts = g_counts[ind_recv]; + + /* Loop over the gparts received from that node */ + for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { + + /* Does this gpart have a partner ? */ + if (gparts_new[k].id_or_neg_offset <= 0) { + + const size_t partner_index = offset_parts - gparts_new[k].id_or_neg_offset; + + /* Re-link */ + gparts_new[k].id_or_neg_offset = -partner_index; + parts_new[partner_index].gpart = &gparts_new[k]; + } + } + + offset_parts += count_parts; + offset_gparts += count_gparts; + } + /* Verify that all parts are in the right place. */ - /* for ( k = 0 ; k < nr_parts ; k++ ) { - cid = cell_getid( cdim , parts_new[k].x[0]*ih[0] , parts_new[k].x[1]*ih[1] - , parts_new[k].x[2]*ih[2] ); + /* for ( int k = 0 ; k < nr_parts ; k++ ) { + int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], + parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] ); if ( cells[ cid ].nodeID != nodeID ) - error( "Received particle (%i) that does not belong here (nodeID=%i)." - , k , cells[ cid ].nodeID ); - } */ + error( "Received particle (%i) that does not belong here + (nodeID=%i).", k , cells[ cid ].nodeID ); + } */ + + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (gparts_new[k].id_or_neg_offset <= 0) { + + struct part *part = &parts_new[-gparts_new[k].id_or_neg_offset]; + + if (part->gpart != &gparts_new[k]) + error("Linking problem !"); + + if (gparts_new[k].x[0] != part->x[0] || + gparts_new[k].x[1] != part->x[1] || + gparts_new[k].x[2] != part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (parts_new[k].gpart != NULL && + parts_new[k].gpart->id_or_neg_offset != -k) { + error("Linking problem !"); + } + } /* Set the new part data, free the old. */ free(parts); free(xparts); + free(gparts); s->parts = parts_new; s->xparts = xparts_new; + s->gparts = gparts_new; s->nr_parts = nr_parts; - s->size_parts = 1.2 * nr_parts; + s->nr_gparts = nr_gparts; + s->size_parts = engine_redistribute_alloc_margin * nr_parts; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts; - /* Be verbose about what just happened. */ - for (int k = 0; k < nr_cells; k++) - if (cells[k].nodeID == nodeID) my_cells += 1; - if (e->verbose) - message("node %i now has %zi parts in %i cells.", nodeID, nr_parts, - my_cells); - - /* Clean up other stuff. */ + /* Clean up the temporary stuff. */ free(reqs); free(counts); free(dest); + /* Be verbose about what just happened. */ + if (e->verbose) { + int my_cells = 0; + for (int k = 0; k < nr_cells; k++) + if (cells[k].nodeID == nodeID) my_cells += 1; + message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID, + nr_parts, nr_gparts, my_cells); + } + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -513,7 +709,7 @@ void engine_exchange_cells(struct engine *e) { /* Wait for each count to come in and start the recv. */ for (int k = 0; k < nr_proxies; k++) { - int pid; + int pid = MPI_UNDEFINED; if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); @@ -533,7 +729,7 @@ void engine_exchange_cells(struct engine *e) { /* Wait for each pcell array to come in from the proxies. */ for (int k = 0; k < nr_proxies; k++) { - int pid; + int pid = MPI_UNDEFINED; if (MPI_Waitany(nr_proxies, reqs_in, &pid, &status) != MPI_SUCCESS || pid == MPI_UNDEFINED) error("MPI_Waitany failed."); @@ -549,31 +745,40 @@ void engine_exchange_cells(struct engine *e) { /* Count the number of particles we need to import and re-allocate the buffer if needed. */ - int count_in = 0; + int count_parts_in = 0, count_gparts_in = 0; for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) - count_in += e->proxies[k].cells_in[j]->count; - if (count_in > s->size_parts_foreign) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + count_parts_in += e->proxies[k].cells_in[j]->count; + count_gparts_in += e->proxies[k].cells_in[j]->gcount; + } + if (count_parts_in > s->size_parts_foreign) { if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_in; + s->size_parts_foreign = 1.1 * count_parts_in; if (posix_memalign((void **)&s->parts_foreign, part_align, sizeof(struct part) * s->size_parts_foreign) != 0) error("Failed to allocate foreign part data."); } + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = 1.1 * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - cell_link(e->proxies[k].cells_in[j], parts); + cell_link_parts(e->proxies[k].cells_in[j], parts); + cell_link_gparts(e->proxies[k].cells_in[j], gparts); parts = &parts[e->proxies[k].cells_in[j]->count]; + gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; } } s->nr_parts_foreign = parts - s->parts_foreign; - - /* Is the parts buffer large enough? */ - if (s->nr_parts_foreign > s->size_parts_foreign) - error("Foreign parts buffer too small."); + s->nr_gparts_foreign = gparts - s->gparts_foreign; /* Free the pcell buffer. */ free(pcells); @@ -591,16 +796,24 @@ void engine_exchange_cells(struct engine *e) { * @brief Exchange straying parts with other nodes. * * @param e The #engine. - * @param offset The index in the parts array as of which the foreign parts - *reside. - * @param ind The ID of the foreign #cell. - * @param N The number of stray parts. + * @param offset_parts The index in the parts array as of which the foreign + * parts reside. + * @param ind_part The foreign #cell ID of each part. + * @param Npart The number of stray parts, contains the number of parts received + * on return. + * @param offset_gparts The index in the gparts array as of which the foreign + * parts reside. + * @param ind_gpart The foreign #cell ID of each gpart. + * @param Ngpart The number of stray gparts, contains the number of gparts + * received on return. * - * @return The number of arrived parts copied to parts and xparts. + * Note that this function does not mess-up the linkage between parts and + * gparts, i.e. the received particles have correct linkeage. */ -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, - size_t N) { +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart) { #ifdef WITH_MPI @@ -610,25 +823,49 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Re-set the proxies. */ for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0; - /* Put the parts into the corresponding proxies. */ - for (size_t k = 0; k < N; k++) { - const int node_id = e->s->cells[ind[k]].nodeID; + /* Put the parts and gparts into the corresponding proxies. */ + for (size_t k = 0; k < *Npart; k++) { + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells[ind_part[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; - if (pid < 0) + if (pid < 0) { error( "Do not have a proxy for the requested nodeID %i for part with " "id=%llu, x=[%e,%e,%e].", - node_id, s->parts[offset + k].id, s->parts[offset + k].x[0], - s->parts[offset + k].x[1], s->parts[offset + k].x[2]); - proxy_parts_load(&e->proxies[pid], &s->parts[offset + k], - &s->xparts[offset + k], 1); + node_id, s->parts[offset_parts + k].id, + s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1], + s->parts[offset_parts + k].x[2]); + } + + /* Re-link the associated gpart with the buffer offset of the part. */ + if (s->parts[offset_parts + k].gpart != NULL) { + s->parts[offset_parts + k].gpart->id_or_neg_offset = -e->proxies[pid].nr_parts_in; + } + + /* Load the part and xpart into the proxy. */ + proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], + &s->xparts[offset_parts + k], 1); + } + for (size_t k = 0; k < *Ngpart; k++) { + const int node_id = e->s->cells[ind_gpart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lli, x=[%e,%e,%e].", + node_id, s->gparts[offset_parts + k].id_or_neg_offset, + s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], + s->gparts[offset_gparts + k].x[2]); + proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); } /* Launch the proxies. */ - MPI_Request reqs_in[2 * engine_maxproxies]; - MPI_Request reqs_out[2 * engine_maxproxies]; + MPI_Request reqs_in[3 * engine_maxproxies]; + MPI_Request reqs_out[3 * engine_maxproxies]; for (int k = 0; k < e->nr_proxies; k++) { proxy_parts_exch1(&e->proxies[k]); reqs_in[k] = e->proxies[k].req_parts_count_in; @@ -637,7 +874,7 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Wait for each count to come in and start the recv. */ for (int k = 0; k < e->nr_proxies; k++) { - int pid; + int pid = MPI_UNDEFINED; if (MPI_Waitany(e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE) != MPI_SUCCESS || pid == MPI_UNDEFINED) @@ -652,11 +889,18 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Count the total number of incoming particles and make sure we have enough space to accommodate them. */ - size_t count_in = 0; - for (int k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in; - if (e->verbose) message("sent out %zi particles, got %zi back.", N, count_in); - if (offset + count_in > s->size_parts) { - s->size_parts = (offset + count_in) * 1.05; + int count_parts_in = 0; + int count_gparts_in = 0; + for (int k = 0; k < e->nr_proxies; k++) { + count_parts_in += e->proxies[k].nr_parts_in; + count_gparts_in += e->proxies[k].nr_gparts_in; + } + if (e->verbose) { + message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart, + count_parts_in, count_gparts_in); + } + if (offset_parts + count_parts_in > s->size_parts) { + s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, @@ -664,37 +908,61 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, posix_memalign((void **)&xparts_new, part_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); - memcpy(parts_new, s->parts, sizeof(struct part) * offset); - memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset); + memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); + memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); free(s->parts); free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; } + if (offset_gparts + count_gparts_in > s->size_gparts) { + s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); + free(s->gparts); + s->gparts = gparts_new; + } /* Collect the requests for the particle data from the proxies. */ int nr_in = 0, nr_out = 0; for (int k = 0; k < e->nr_proxies; k++) { if (e->proxies[k].nr_parts_in > 0) { - reqs_in[2 * k] = e->proxies[k].req_parts_in; - reqs_in[2 * k + 1] = e->proxies[k].req_xparts_in; + reqs_in[3 * k] = e->proxies[k].req_parts_in; + reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in; + nr_in += 2; + } else { + reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_in > 0) { + reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in; nr_in += 1; - } else - reqs_in[2 * k] = reqs_in[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_in[3 * k + 2] = MPI_REQUEST_NULL; + } if (e->proxies[k].nr_parts_out > 0) { - reqs_out[2 * k] = e->proxies[k].req_parts_out; - reqs_out[2 * k + 1] = e->proxies[k].req_xparts_out; + reqs_out[3 * k] = e->proxies[k].req_parts_out; + reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out; + nr_out += 2; + } else { + reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_out > 0) { + reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out; nr_out += 1; - } else - reqs_out[2 * k] = reqs_out[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_out[3 * k + 2] = MPI_REQUEST_NULL; + } } /* Wait for each part array to come in and collect the new parts from the proxies. */ - size_t count = 0; - for (int k = 0; k < 2 * (nr_in + nr_out); k++) { + int count_parts = 0, count_gparts = 0; + for (int k = 0; k < nr_in; k++) { int err, pid; - if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid, + if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int res; @@ -702,26 +970,45 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, error("MPI_Waitany failed (%s).", buff); } if (pid == MPI_UNDEFINED) break; - // message( "request from proxy %i has arrived." , pid ); - if (reqs_in[pid & ~1] == MPI_REQUEST_NULL && - reqs_in[pid | 1] == MPI_REQUEST_NULL) { + // message( "request from proxy %i has arrived." , pid / 3 ); + pid = 3 * (pid / 3); + + /* If all the requests for a given proxy have arrived... */ + if (reqs_in[pid + 0] == MPI_REQUEST_NULL && + reqs_in[pid + 1] == MPI_REQUEST_NULL && + reqs_in[pid + 2] == MPI_REQUEST_NULL) { + /* Copy the particle data to the part/xpart/gpart arrays. */ struct proxy *p = &e->proxies[pid >> 1]; - memcpy(&s->parts[offset + count], p->parts_in, + memcpy(&s->parts[offset_parts + count_parts], p->parts_in, sizeof(struct part) * p->nr_parts_in); - memcpy(&s->xparts[offset + count], p->xparts_in, + memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in, sizeof(struct xpart) * p->nr_parts_in); + memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in, + sizeof(struct gpart) * p->nr_gparts_in); /* for (int k = offset; k < offset + count; k++) message( "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.", s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1], s->parts[k].x[2], s->parts[k].h, p->nodeID); */ - count += p->nr_parts_in; + + /* Re-link the gparts. */ + for (int k = 0; k < p->nr_gparts_in; k++) { + struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; + if (gp->id_or_neg_offset <= 0) { + s->parts[offset_gparts + count_parts - gp->id_or_neg_offset].gpart = gp; + gp->id_or_neg_offset = -(offset_gparts + count_parts - gp->id_or_neg_offset); + } + } + + /* Advance the counters. */ + count_parts += p->nr_parts_in; + count_gparts += p->nr_gparts_in; } } /* Wait for all the sends to have finished too. */ if (nr_out > 0) - if (MPI_Waitall(2 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != + if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); @@ -730,11 +1017,11 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, clocks_getunit()); /* Return the number of harvested parts. */ - return count; + *Npart = count_parts; + *Ngpart = count_gparts; #else error("SWIFT was not compiled with MPI support."); - return 0; #endif } @@ -743,7 +1030,7 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, *neighbours * * Here we construct all the tasks for all possible neighbouring non-empty - * local cells in the hierarchy. No dependencies are being added thus far. + * local cells in the hierarchy. No dependencies are being added thus far. * Additional loop over neighbours can later be added by simply duplicating * all the tasks created by this function. * @@ -761,12 +1048,14 @@ void engine_make_hydroloop_tasks(struct engine *e) { for (int i = 0; i < cdim[0]; i++) { for (int j = 0; j < cdim[1]; j++) { for (int k = 0; k < cdim[2]; k++) { - int cid = cell_getid(cdim, i, j, k); - /* Skip cells without hydro particles */ - if (cells[cid].count == 0) continue; + /* Get the cell */ + const int cid = cell_getid(cdim, i, j, k); struct cell *ci = &cells[cid]; + /* Skip cells without hydro particles */ + if (ci->count == 0) continue; + /* If the cells is local build a self-interaction */ if (ci->nodeID == nodeID) scheduler_addtask(sched, task_type_self, task_subtype_density, 0, 0, @@ -785,14 +1074,19 @@ void engine_make_hydroloop_tasks(struct engine *e) { int kkk = k + kk; if (!s->periodic && (kkk < 0 || kkk >= cdim[2])) continue; kkk = (kkk + cdim[2]) % cdim[2]; - int cjd = cell_getid(cdim, iii, jjj, kkk); + + /* Get the neighbouring cell */ + const int cjd = cell_getid(cdim, iii, jjj, kkk); struct cell *cj = &cells[cjd]; /* Is that neighbour local and does it have particles ? */ if (cid >= cjd || cj->count == 0 || (ci->nodeID != nodeID && cj->nodeID != nodeID)) continue; - int sid = sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; + + /* Construct the pair task */ + const int sid = + sortlistID[(kk + 1) + 3 * ((jj + 1) + 3 * (ii + 1))]; scheduler_addtask(sched, task_type_pair, task_subtype_density, sid, 0, ci, cj, 1); } @@ -874,10 +1168,16 @@ void engine_count_and_link_tasks(struct engine *e) { } /** - * @brief Duplicates the first hydro loop and creates the corresponding - *dependencies using the ghost tasks. + * @brief Duplicates the first hydro loop and construct all the + * dependencies for the hydro part + * + * This is done by looping over all the previously constructed tasks + * and adding another task involving the same cells but this time + * corresponding to the second hydro loop over neighbours. + * With all the relevant tasks for a given cell available, we construct + * all the dependencies for that cell. * - * @parma e The #engine. + * @param e The #engine. */ void engine_make_extra_hydroloop_tasks(struct engine *e) { @@ -895,20 +1195,39 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /* Self-interaction? */ if (t->type == task_type_self && t->subtype == task_subtype_density) { - scheduler_addunlock(sched, t->ci->super->init, t); - scheduler_addunlock(sched, t, t->ci->super->ghost); + + /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask( sched, task_type_self, task_subtype_force, 0, 0, t->ci, NULL, 0); - scheduler_addunlock(sched, t->ci->super->ghost, t2); - scheduler_addunlock(sched, t2, t->ci->super->kick); + + /* Add the link between the new loop and the cell */ t->ci->force = engine_addlink(e, t->ci->force, t2); atomic_inc(&t->ci->nr_force); + + /* Now, build all the dependencies for the hydro */ + /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ + scheduler_addunlock(sched, t->ci->super->init, t); + scheduler_addunlock(sched, t, t->ci->super->ghost); + scheduler_addunlock(sched, t->ci->super->ghost, t2); + scheduler_addunlock(sched, t2, t->ci->super->kick); } /* Otherwise, pair interaction? */ else if (t->type == task_type_pair && t->subtype == task_subtype_density) { + + /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask( sched, task_type_pair, task_subtype_force, 0, 0, t->ci, t->cj, 0); + + /* Add the link between the new loop and both cells */ + t->ci->force = engine_addlink(e, t->ci->force, t2); + atomic_inc(&t->ci->nr_force); + t->cj->force = engine_addlink(e, t->cj->force, t2); + atomic_inc(&t->cj->nr_force); + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ if (t->ci->nodeID == nodeID) { scheduler_addunlock(sched, t->ci->super->init, t); scheduler_addunlock(sched, t, t->ci->super->ghost); @@ -921,17 +1240,27 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { scheduler_addunlock(sched, t->cj->super->ghost, t2); scheduler_addunlock(sched, t2, t->cj->super->kick); } - t->ci->force = engine_addlink(e, t->ci->force, t2); - atomic_inc(&t->ci->nr_force); - t->cj->force = engine_addlink(e, t->cj->force, t2); - atomic_inc(&t->cj->nr_force); } /* Otherwise, sub interaction? */ else if (t->type == task_type_sub && t->subtype == task_subtype_density) { + + /* Start by constructing the task for the second hydro loop */ struct task *t2 = scheduler_addtask(sched, task_type_sub, task_subtype_force, t->flags, 0, t->ci, t->cj, 0); + + /* Add the link between the new loop and both cells */ + t->ci->force = engine_addlink(e, t->ci->force, t2); + atomic_inc(&t->ci->nr_force); + if (t->cj != NULL) { + t->cj->force = engine_addlink(e, t->cj->force, t2); + atomic_inc(&t->cj->nr_force); + } + + /* Now, build all the dependencies for the hydro for the cells */ + /* that are local and are not descendant of the same super-cells */ + /* init --> t (density loop) --> ghost --> t2 (force loop) --> kick */ if (t->ci->nodeID == nodeID) { scheduler_addunlock(sched, t, t->ci->super->ghost); scheduler_addunlock(sched, t->ci->super->ghost, t2); @@ -943,12 +1272,6 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { scheduler_addunlock(sched, t->cj->super->ghost, t2); scheduler_addunlock(sched, t2, t->cj->super->kick); } - t->ci->force = engine_addlink(e, t->ci->force, t2); - atomic_inc(&t->ci->nr_force); - if (t->cj != NULL) { - t->cj->force = engine_addlink(e, t->cj->force, t2); - atomic_inc(&t->cj->nr_force); - } } /* /\* Kick tasks should rely on the grav_down tasks of their cell. *\/ */ @@ -960,6 +1283,8 @@ void engine_make_extra_hydroloop_tasks(struct engine *e) { /** * @brief Constructs the top-level pair tasks for the gravity M-M interactions * + * Correct implementation is still lacking here. + * * @param e The #engine. */ void engine_make_gravityinteraction_tasks(struct engine *e) { @@ -994,6 +1319,8 @@ void engine_make_gravityinteraction_tasks(struct engine *e) { * @brief Constructs the gravity tasks building the multipoles and propagating *them to the children * + * Correct implementation is still lacking here. + * * @param e The #engine. */ void engine_make_gravityrecursive_tasks(struct engine *e) { @@ -1028,7 +1355,6 @@ void engine_make_gravityrecursive_tasks(struct engine *e) { * * @param e The #engine we are working with. */ - void engine_maketasks(struct engine *e) { struct space *s = e->s; @@ -1130,9 +1456,10 @@ void engine_maketasks(struct engine *e) { int engine_marktasks(struct engine *e) { struct scheduler *s = &e->sched; - const int nr_tasks = s->nr_tasks, *ind = s->tasks_ind; + const int ti_end = e->ti_current; + const int nr_tasks = s->nr_tasks; + const int *const ind = s->tasks_ind; struct task *tasks = s->tasks; - const float ti_end = e->ti_current; const ticks tic = getticks(); /* Much less to do here if we're on a fixed time-step. */ @@ -1232,6 +1559,7 @@ int engine_marktasks(struct engine *e) { else if (t->type == task_type_kick) { t->skip = (t->ci->ti_end_min > ti_end); t->ci->updated = 0; + t->ci->g_updated = 0; } /* Drift? */ @@ -1288,6 +1616,7 @@ void engine_print_task_counts(struct engine *e) { printf(" skipped=%i ]\n", counts[task_type_count]); fflush(stdout); message("nr_parts = %zi.", e->s->nr_parts); + message("nr_gparts = %zi.", e->s->nr_gparts); } /** @@ -1298,7 +1627,7 @@ void engine_print_task_counts(struct engine *e) { void engine_rebuild(struct engine *e) { - ticks tic = getticks(); + const ticks tic = getticks(); /* Clear the forcerebuild flag, whatever it was. */ e->forcerebuild = 0; @@ -1341,7 +1670,7 @@ void engine_prepare(struct engine *e) { /* Collect the values of rebuild from all nodes. */ #ifdef WITH_MPI - int buff; + int buff = 0; if (MPI_Allreduce(&rebuild, &buff, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate the rebuild flag across nodes."); @@ -1417,7 +1746,7 @@ void engine_collect_kick(struct cell *c) { if (c->kick != NULL) return; /* Counters for the different quantities. */ - int updated = 0; + int updated = 0, g_updated = 0; double e_kin = 0.0, e_int = 0.0, e_pot = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f}; int ti_end_min = max_nr_timesteps, ti_end_max = 0; @@ -1440,6 +1769,7 @@ void engine_collect_kick(struct cell *c) { ti_end_min = min(ti_end_min, cp->ti_end_min); ti_end_max = max(ti_end_max, cp->ti_end_max); updated += cp->updated; + g_updated += cp->g_updated; e_kin += cp->e_kin; e_int += cp->e_int; e_pot += cp->e_pot; @@ -1457,6 +1787,7 @@ void engine_collect_kick(struct cell *c) { c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; c->updated = updated; + c->g_updated = g_updated; c->e_kin = e_kin; c->e_int = e_int; c->e_pot = e_pot; @@ -1520,7 +1851,15 @@ void engine_init_particles(struct engine *e) { /* Make sure all particles are ready to go */ /* i.e. clean-up any stupid state in the ICs */ - space_map_cells_pre(s, 1, cell_init_parts, NULL); + if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + space_map_cells_pre(s, 1, cell_init_parts, NULL); + } + if (((e->policy & engine_policy_self_gravity) == + engine_policy_self_gravity) || + ((e->policy & engine_policy_external_gravity) == + engine_policy_external_gravity)) { + space_map_cells_pre(s, 1, cell_init_gparts, NULL); + } engine_prepare(e); @@ -1594,7 +1933,7 @@ void engine_init_particles(struct engine *e) { */ void engine_step(struct engine *e) { - int updates = 0; + int updates = 0, g_updates = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; double e_pot = 0.0, e_int = 0.0, e_kin = 0.0; float mom[3] = {0.0, 0.0, 0.0}; @@ -1621,6 +1960,7 @@ void engine_step(struct engine *e) { e_int += c->e_int; e_pot += c->e_pot; updates += c->updated; + g_updates += c->g_updated; mom[0] += c->mom[0]; mom[1] += c->mom[1]; mom[2] += c->mom[2]; @@ -1632,7 +1972,8 @@ void engine_step(struct engine *e) { /* Aggregate the data from the different nodes. */ #ifdef WITH_MPI { - int in_i[4], out_i[4]; + int in_i[1], out_i[1]; + in_i[0] = 0; out_i[0] = ti_end_min; if (MPI_Allreduce(out_i, in_i, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD) != MPI_SUCCESS) @@ -1645,18 +1986,20 @@ void engine_step(struct engine *e) { ti_end_max = in_i[0]; } { - double in_d[4], out_d[4]; + double in_d[5], out_d[5]; out_d[0] = updates; - out_d[1] = e_kin; - out_d[2] = e_int; - out_d[3] = e_pot; - if (MPI_Allreduce(out_d, in_d, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != + out_d[1] = g_updates; + out_d[2] = e_kin; + out_d[3] = e_int; + out_d[4] = e_pot; + if (MPI_Allreduce(out_d, in_d, 5, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate energies."); updates = in_d[0]; - e_kin = in_d[1]; - e_int = in_d[2]; - e_pot = in_d[3]; + g_updates = in_d[1]; + e_kin = in_d[2]; + e_int = in_d[3]; + e_pot = in_d[4]; } #endif @@ -1681,8 +2024,8 @@ void engine_step(struct engine *e) { if (e->nodeID == 0) { /* Print some information to the screen */ - printf("%d %e %e %d %.3f\n", e->step, e->time, e->timeStep, updates, - e->wallclock_time); + printf("%d %e %e %d %d %.3f\n", e->step, e->time, e->timeStep, updates, + g_updates, e->wallclock_time); fflush(stdout); /* Write some energy statistics */ @@ -1885,7 +2228,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { engine_makeproxies(e); /* Re-allocate the local parts. */ - if (e->nodeID == 0) + if (e->verbose) message("Re-allocating parts array from %zi to %zi.", s->size_parts, (size_t)(s->nr_parts * 1.2)); s->size_parts = s->nr_parts * 1.2; @@ -1893,7 +2236,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, part_align, + posix_memalign((void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts); @@ -1902,6 +2245,47 @@ void engine_split(struct engine *e, struct partition *initial_partition) { free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; + + /* Re-link the gparts. */ + part_relink_gparts(s->parts, s->nr_parts, 0); + + /* Re-allocate the local gparts. */ + if (e->verbose) + message("Re-allocating gparts array from %zi to %zi.", s->size_gparts, + (size_t)(s->nr_gparts * 1.2)); + s->size_gparts = s->nr_gparts * 1.2; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts); + free(s->gparts); + s->gparts = gparts_new; + + /* Re-link the parts. */ + part_relink_parts(s->gparts, s->nr_gparts, s->parts); + + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < s->nr_gparts; ++k) { + + if (s->gparts[k].id_or_neg_offset <= 0) { + + struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; + + if (part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != part->x[0] || + s->gparts[k].x[1] != part->x[1] || + s->gparts[k].x[2] != part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < s->nr_parts; ++k) { + + if (s->parts[k].gpart != NULL && s->parts[k].gpart->id_or_neg_offset != -k) error("Linking problem !"); + } + #else error("SWIFT was not compiled with MPI support."); #endif @@ -1974,6 +2358,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, e->dt_max = dt_max; e->file_stats = NULL; e->verbose = verbose; + e->count_step = 0; e->wallclock_time = 0.f; engine_rank = nodeID; @@ -2084,10 +2469,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, "(t_beg = %e)", timeEnd, timeBegin); - /* Check we have sensible time step bounds */ + /* Check we have sensible time-step values */ if (e->dt_min > e->dt_max) error( - "Minimal time step size must be smaller than maximal time step size "); + "Minimal time-step size (%e) must be smaller than maximal time-step " + "size (%e)", + e->dt_min, e->dt_max); /* Deal with timestep */ e->timeBase = (timeEnd - timeBegin) / max_nr_timesteps; @@ -2133,8 +2520,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, /* Construct types for MPI communications */ #ifdef WITH_MPI - part_create_mpi_type(&e->part_mpi_type); - xpart_create_mpi_type(&e->xpart_mpi_type); + part_create_mpi_types(); #endif /* First of all, init the barrier and lock it. */ diff --git a/src/engine.h b/src/engine.h index 741ae1f553494e435394f529606b4cb794b0e3d2..4d1860b9eed0203bf9bf75711ec6e6549d837fe7 100644 --- a/src/engine.h +++ b/src/engine.h @@ -62,6 +62,8 @@ extern const char *engine_policy_names[]; #define engine_maxtaskspercell 96 #define engine_maxproxies 64 #define engine_tasksreweight 10 +#define engine_parts_size_grow 1.05 +#define engine_redistribute_alloc_margin 1.2 /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; @@ -160,12 +162,6 @@ struct engine { /* Are we talkative ? */ int verbose; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ @@ -182,7 +178,9 @@ void engine_init_particles(struct engine *e); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, size_t N); +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); void engine_makeproxies(struct engine *e); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 82bc52ad3e05794c8c05896075edc463a69197ff..92a9f64c1f84a9e949f4c0e9485f892b5c808cdc 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -22,14 +22,61 @@ /** * @brief Computes the gravity time-step of a given particle * - * @param p Pointer to the particle data - * @param xp Pointer to the extended particle data + * @param gp Pointer to the g-particle data * */ -__attribute__((always_inline)) INLINE static float gravity_compute_timestep( - struct part* p, struct xpart* xp) { +__attribute__((always_inline)) + INLINE static float gravity_compute_timestep(struct gpart* gp) { /* Currently no limit is imposed */ return FLT_MAX; } + +/** + * @brief Initialises the g-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_first_init_gpart(struct gpart* gp) {} + +/** + * @brief Prepares a g-particle for the gravity calculation + * + * Zeroes all the relevant arrays in preparation for the sums taking place in + * the variaous tasks + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_init_part(struct gpart* gp) { + + /* Zero the acceleration */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; +} + +/** + * @brief Finishes the gravity calculation. + * + * Multiplies the forces and accelerations by the appropiate constants + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_end_force(struct gpart* gp) {} + +/** + * @brief Kick the additional variables + * + * @param gp The particle to act upon + * @param dt The time-step for this kick + * @param half_dt The half time-step for this kick + */ +__attribute__((always_inline)) INLINE static void gravity_kick_extra( + struct gpart* gp, float dt, float half_dt) {} diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 62f3cfd43edde2564e231ec272965bfda8ab59da..531afffa5c2958eea49fe49171cde81fa8350fcf 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -24,5 +24,5 @@ __attribute__((always_inline)) "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "mass=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], - p->a[0], p->a[1], p->a[2], p->mass, p->ti_begin, p->ti_end); + p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, p->ti_end); } diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index e62be446e8263bf02e3fd73f902b28cb1c3b16cf..d0391aa7819475b46a44ab816c5e15c7bf74a440 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -25,16 +25,9 @@ #include "kernel.h" #include "vector.h" -/** - * @file runner_iact_grav.h - * @brief Gravity interaction functions. - * - */ - /** * @brief Gravity potential */ - __attribute__((always_inline)) INLINE static void runner_iact_grav( float r2, float *dx, struct gpart *pi, struct gpart *pj) { @@ -56,8 +49,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav( /* Aggregate the accelerations. */ for (k = 0; k < 3; k++) { w = acc * dx[k]; - pi->a[k] -= w * mj; - pj->a[k] += w * mi; + pi->a_grav[k] -= w * mj; + pj->a_grav[k] += w * mi; } } @@ -107,8 +100,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_grav( ai.v = w.v * mj.v; aj.v = w.v * mi.v; for (j = 0; j < VEC_SIZE; j++) { - pi[j]->a[k] -= ai.f[j]; - pj[j]->a[k] += aj.f[j]; + pi[j]->a_grav[k] -= ai.f[j]; + pj[j]->a_grav[k] += aj.f[j]; } } diff --git a/src/gravity/Default/gravity_io.h b/src/gravity/Default/gravity_io.h index bcda40c21935cc68a45af69688b7162aebd8ccc9..74f364dd97361f0513755bedec83fe7cb277c36b 100644 --- a/src/gravity/Default/gravity_io.h +++ b/src/gravity/Default/gravity_io.h @@ -48,6 +48,8 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param Ndm The number of DM particles on that MPI rank. * @param Ndm_total The total number of g-particles (only used in MPI mode) @@ -59,18 +61,20 @@ __attribute__((always_inline)) INLINE static void darkmatter_read_particles( * */ __attribute__((always_inline)) INLINE static void darkmatter_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int Ndm, long long Ndm_total, - int mpi_rank, long long offset, struct gpart* gparts, - struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, + int Ndm, long long Ndm_total, int mpi_rank, long long offset, + struct gpart* gparts, struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, Ndm, 3, gparts, - Ndm_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, Ndm, 1, gparts, - Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, Ndm, 3, gparts, - Ndm_total, mpi_rank, offset, v_full, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, Ndm, 1, gparts, - Ndm_total, mpi_rank, offset, id_or_neg_offset, us, - UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + Ndm, 3, gparts, Ndm_total, mpi_rank, offset, x, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, Ndm, + 1, gparts, Ndm_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + Ndm, 3, gparts, Ndm_total, mpi_rank, offset, v_full, us, + UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, Ndm, 1, gparts, Ndm_total, mpi_rank, offset, + id_or_neg_offset, us, UNIT_CONV_NO_UNITS); } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index 634ee4ae8453292e272eb1b62720e8c74fca4497..d36ceea650a54e1fdd0ff1fcf162a830dc5ed7cb 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -29,7 +29,7 @@ struct gpart { float v_full[3]; /* Particle acceleration. */ - float a[3]; + float a_grav[3]; /* Particle mass. */ float mass; @@ -44,4 +44,4 @@ struct gpart { which this gpart is linked. */ long long id_or_neg_offset; -} __attribute__((aligned(part_align))); +} __attribute__((aligned(gpart_align))); diff --git a/src/hydro/Default/hydro_io.h b/src/hydro/Default/hydro_io.h index 958bf5a1869718b57678246ff3b1985e54145824..0e9ad46ddc1d4e8c8d3ffdbf3e81262ec49a7092 100644 --- a/src/hydro/Default/hydro_io.h +++ b/src/hydro/Default/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us, + UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index d31b6be383b80a2698b63d27308f6fee9b23518f..09f796a8f37a9c015135f4aab3f821c2e862bdc9 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -93,8 +93,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( dv[2] = pi->v[2] - pj->v[2]; const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - pi->div_v += faci * dvdr; - pj->div_v += facj * dvdr; + pi->div_v -= faci * dvdr; + pj->div_v -= facj * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; @@ -211,10 +211,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Balsara term */ const float balsara_i = fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi); + (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); const float balsara_j = fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj); + (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); @@ -309,10 +309,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Balsara term */ const float balsara_i = fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi); + (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); const float balsara_j = fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj); + (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); diff --git a/src/hydro/Gadget2/hydro_io.h b/src/hydro/Gadget2/hydro_io.h index 17c3d3013644c3572f3c26fc3e270b1c1bc465ed..c1c59dfa4980a2843e7e13bee4c964c9b254cae6 100644 --- a/src/hydro/Gadget2/hydro_io.h +++ b/src/hydro/Gadget2/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,27 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, entropy, us, + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, entropy, us, UNIT_CONV_ENTROPY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 6afb9d8d38a4fc7f1d38b7286720ddb7f3c51ab4..b3b81a9a0dfe41e7bfafe51050d6f7cf7157e31c 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -16,8 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_RUNNER_IACT_H -#define SWIFT_RUNNER_IACT_H +#ifndef SWIFT_RUNNER_IACT_MINIMAL_H +#define SWIFT_RUNNER_IACT_MINIMAL_H /* Includes. */ #include "const.h" @@ -38,33 +38,31 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2); - float xi, xj; - float h_inv; float wi, wj, wi_dx, wj_dx; - float mi, mj; + + const float r = sqrtf(r2); /* Get the masses. */ - mi = pi->mass; - mj = pj->mass; + const float mi = pi->mass; + const float mj = pj->mass; /* Compute density of pi. */ - h_inv = 1.0 / hi; - xi = r * h_inv; + const float hi_inv = 1.f / hi; + const float xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; /* Compute density of pj. */ - h_inv = 1.f / hj; - xj = r * h_inv; + const float hj_inv = 1.f / hj; + const float xj = r * hj_inv; kernel_deval(xj, &wj, &wj_dx); pj->rho += mi * wj; - pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx); + pj->rho_dh -= mi * (3.f * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; } @@ -76,24 +74,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r; - float xi; - float h_inv; float wi, wi_dx; - float mj; /* Get the masses. */ - mj = pj->mass; + const float mj = pj->mass; /* Get r and r inverse. */ - r = sqrtf(r2); + const float r = sqrtf(r2); - h_inv = 1.f / hi; - xi = r * h_inv; + const float h_inv = 1.f / hi; + const float xi = r * h_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; } @@ -148,7 +142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute sound speeds */ const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); - float v_sig = ci + cj + 3.f * omega_ij; + const float v_sig = ci + cj + 3.f * omega_ij; /* SPH acceleration term */ const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; @@ -225,7 +219,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute sound speeds */ const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); - float v_sig = ci + cj + 3.f * omega_ij; + const float v_sig = ci + cj + 3.f * omega_ij; /* SPH acceleration term */ const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; @@ -245,4 +239,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); } -#endif /* SWIFT_RUNNER_IACT_H */ +#endif /* SWIFT_RUNNER_IACT_MINIMAL_H */ diff --git a/src/hydro/Minimal/hydro_io.h b/src/hydro/Minimal/hydro_io.h index 2c56fb489ab84ca7c30426b54cf95e26e3821084..afe5de83f423e43b4d2480cca1ac3e84d6c549de 100644 --- a/src/hydro/Minimal/hydro_io.h +++ b/src/hydro/Minimal/hydro_io.h @@ -56,6 +56,8 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * * @param h_grp The HDF5 group in which to write the arrays. * @param fileName The name of the file (unsued in MPI mode). + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param xmfFile The XMF file to write to (unused in MPI mode). * @param N The number of particles on that MPI rank. * @param N_total The total number of particles (only used in MPI mode) @@ -67,26 +69,31 @@ __attribute__((always_inline)) INLINE static void hydro_read_particles( * */ __attribute__((always_inline)) INLINE static void hydro_write_particles( - hid_t h_grp, char* fileName, FILE* xmfFile, int N, long long N_total, - int mpi_rank, long long offset, struct part* parts, struct UnitSystem* us) { + hid_t h_grp, char* fileName, char* partTypeGroupName, FILE* xmfFile, int N, + long long N_total, int mpi_rank, long long offset, struct part* parts, + struct UnitSystem* us) { /* Write arrays */ - writeArray(h_grp, fileName, xmfFile, "Coordinates", DOUBLE, N, 3, parts, - N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "Velocities", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); - writeArray(h_grp, fileName, xmfFile, "Masses", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, mass, us, UNIT_CONV_MASS); - writeArray(h_grp, fileName, xmfFile, "SmoothingLength", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, h, us, UNIT_CONV_LENGTH); - writeArray(h_grp, fileName, xmfFile, "InternalEnergy", FLOAT, N, 1, parts, - N_total, mpi_rank, offset, u, us, UNIT_CONV_ENERGY_PER_UNIT_MASS); - writeArray(h_grp, fileName, xmfFile, "ParticleIDs", ULONGLONG, N, 1, parts, - N_total, mpi_rank, offset, id, us, UNIT_CONV_NO_UNITS); - writeArray(h_grp, fileName, xmfFile, "Acceleration", FLOAT, N, 3, parts, - N_total, mpi_rank, offset, a_hydro, us, UNIT_CONV_ACCELERATION); - writeArray(h_grp, fileName, xmfFile, "Density", FLOAT, N, 1, parts, N_total, - mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Coordinates", DOUBLE, + N, 3, parts, N_total, mpi_rank, offset, x, us, UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Velocities", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, v, us, UNIT_CONV_SPEED); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Masses", FLOAT, N, 1, + parts, N_total, mpi_rank, offset, mass, us, UNIT_CONV_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "SmoothingLength", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, h, us, + UNIT_CONV_LENGTH); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "InternalEnergy", + FLOAT, N, 1, parts, N_total, mpi_rank, offset, u, us, + UNIT_CONV_ENERGY_PER_UNIT_MASS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "ParticleIDs", + ULONGLONG, N, 1, parts, N_total, mpi_rank, offset, id, us, + UNIT_CONV_NO_UNITS); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Acceleration", FLOAT, + N, 3, parts, N_total, mpi_rank, offset, a_hydro, us, + UNIT_CONV_ACCELERATION); + writeArray(h_grp, fileName, xmfFile, partTypeGroupName, "Density", FLOAT, N, + 1, parts, N_total, mpi_rank, offset, rho, us, UNIT_CONV_DENSITY); } /** diff --git a/src/multipole.h b/src/multipole.h index 91ba6df965ce9d3b088d538411b7f0a8555ba0e4..b7c20ddff5c3f1afc00af501a53b9659c8728ce8 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -127,7 +127,7 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mp( /* Compute the forces on both multipoles. */ #if multipole_order == 1 - for (k = 0; k < 3; k++) p->a[k] += dx[k] * acc; + for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc; #else #error( "Multipoles of order %i not yet implemented." , multipole_order ) #endif diff --git a/src/parallel_io.c b/src/parallel_io.c index cffa99a0fd75566ec3e850076d15e104504eeb40..0076c225e1c5361287280f8a567c8062aefd914e 100644 --- a/src/parallel_io.c +++ b/src/parallel_io.c @@ -178,9 +178,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * * Calls #error() if an error occurs. */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, long long N_total, - int mpi_rank, long long offset, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, long long N_total, int mpi_rank, + long long offset, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_memspace = 0, h_filespace = 0, h_plist_id = 0; @@ -189,7 +190,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; char buffer[150]; @@ -269,7 +269,9 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - if (mpi_rank == 0) writeXMFline(xmfFile, fileName, name, N_total, dim, type); + if (mpi_rank == 0) + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, + type); /* Write unit conversion factors for this data set */ conversionString(buffer, us, convFactor); @@ -328,14 +330,16 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \ - mpi_rank, offset, (char*)(&(part[0]).field), us, \ - convFactor) +#define writeArray(grp, fileName, xmfFile, pTypeGroupName, name, type, N, dim, \ + part, N_total, mpi_rank, offset, field, us, convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, pTypeGroupName, name, type, N, \ + dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \ + sizeof(part[0]), us, convFactor) /* Import the right hydro definition */ #include "hydro_io.h" +/* Import the right gravity definition */ +#include "gravity_io.h" /** * @brief Reads an HDF5 initial condition file (GADGET-3 type) in parallel @@ -357,16 +361,17 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * */ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info) { + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info) { hid_t h_file = 0, h_grp = 0; - double boxSize[3] = { - 0.0, -1.0, -1.0}; /* GADGET has only cubic boxes (in cosmological mode) */ - int numParticles[6] = { - 0}; /* GADGET has 6 particle types. We only keep the type 0*/ - int numParticles_highWord[6] = {0}; - long long offset = 0; - long long N_total = 0; + /* GADGET has only cubic boxes (in cosmological mode) */ + double boxSize[3] = {0.0, -1.0, -1.0}; + int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; /* Open file */ /* message("Opening file '%s' as IC.", fileName); */ @@ -398,58 +403,116 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); - N_total = ((long long)numParticles[0]) + - ((long long)numParticles_highWord[0] << 32); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); + dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; - /* message("Found %d particles in a %speriodic box of size [%f %f %f].", */ - /* N_total, (periodic ? "": "non-"), dim[0], dim[1], dim[2]); */ + /* message("Found %d particles in a %speriodic box of size + * [%f %f %f].", */ + /* N_total, (periodic ? "": "non-"), dim[0], + * dim[1], dim[2]); */ /* Divide the particles among the tasks. */ - offset = mpi_rank * N_total / mpi_size; - *N = (mpi_rank + 1) * N_total / mpi_size - offset; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + offset[ptype] = mpi_rank * N_total[ptype] / mpi_size; + N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype]; + } /* Close header */ H5Gclose(h_grp); - /* Allocate memory to store particles */ - if (posix_memalign((void*)parts, part_align, *N * sizeof(struct part)) != 0) + /* Allocate memory to store SPH particles */ + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != + 0) error("Error while allocating memory for particles"); - bzero(*parts, *N * sizeof(struct part)); + bzero(*parts, *Ngas * sizeof(struct part)); - /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / + /* Allocate memory to store all particles */ + const size_t Ndm = N[1]; + *Ngparts = N[1] + N[0]; + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error( + "Error while allocating memory for gravity " + "particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + + /* message("Allocated %8.2f MB for particles.", *N * + * sizeof(struct part) / * (1024.*1024.)); */ - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) error("Error while opening particle group.\n"); + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, + * *Ngparts); */ - /* Read particle fields into the particle structure */ - hydro_read_particles(h_grp, *N, N_total, offset, *parts); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Close particle group */ - H5Gclose(h_grp); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + hydro_read_particles(h_grp, N[ptype], N_total[ptype], offset[ptype], + *parts); + break; + + case DM: + darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], + offset[ptype], *gparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + } + + /* Prepare the DM particles */ + prepare_dm_gparts(*gparts, Ndm); + + /* Now duplicate the hydro particle into gparts */ + duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + + /* message("Done Reading particles..."); */ /* Close property handler */ H5Pclose(h_plist_id); /* Close file */ H5Fclose(h_file); - - /* message("Done Reading particles..."); */ } /** - * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor + * @brief Writes an HDF5 output file (GADGET-3 type) with + *its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units in the output + * @param us The UnitSystem used for the conversion of units + *in the output * - * Creates an HDF5 output file and writes the particles contained - * in the engine. If such a file already exists, it is erased and replaced + * Creates an HDF5 output file and writes the particles + *contained + * in the engine. If such a file already exists, it is + *erased and replaced * by the new one. * The companion XMF file is also updated accordingly. * @@ -459,23 +522,27 @@ void read_ic_parallel(char* fileName, double dim[3], struct part** parts, void write_output_parallel(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info) { - hid_t h_file = 0, h_grp = 0, h_grpsph = 0; - int N = e->s->nr_parts; + const size_t Ngas = e->s->nr_parts; + const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; - unsigned int numParticles[6] = {N, 0}; - unsigned int numParticlesHighWord[6] = {0}; - unsigned int flagEntropy[6] = {0}; - long long N_total = 0, offset = 0; - double offset_d = 0., N_d = 0., N_total_d = 0.; int numFiles = 1; struct part* parts = e->s->parts; - FILE* xmfFile = 0; + struct gpart* gparts = e->s->gparts; + struct gpart* dmparts = NULL; static int outputCount = 0; + FILE* xmfFile = 0; + + /* Number of particles of each type */ + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ /* File name */ - char fileName[200]; - sprintf(fileName, "output_%03i.hdf5", outputCount); + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); /* First time, we need to create the XMF file */ if (outputCount == 0 && mpi_rank == 0) createXMFfile(); @@ -491,21 +558,26 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, error("Error while opening file '%s'.", fileName); } - /* Compute offset in the file and total number of particles */ - /* Done using double to allow for up to 2^50=10^15 particles */ - N_d = (double)N; - MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm); - N_total_d = offset_d + N_d; - MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm); - if (N_total_d > 1.e15) - error( - "Error while computing the offset for parallel output: Simulation has " - "more than 10^15 particles.\n"); - N_total = (long long)N_total_d; - offset = (long long)offset_d; + /* Compute offset in the file and total number of + * particles */ + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = offset[ptype] + N[ptype]; + + /* The last rank now has the correct N_total. Let's + * broadcast from there */ + MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); - /* Write the part of the XMF file corresponding to this specific output */ - if (mpi_rank == 0) writeXMFheader(xmfFile, N_total, fileName, e->time); + /* Now everybody konws its offset and the total number of + * particles of each + * type */ + + /* Write the part of the XMF file corresponding to this + * specific output */ + if (mpi_rank == 0) writeXMFoutputheader(xmfFile, fileName, e->time); /* Open header to write simulation properties */ /* message("Writing runtime parameters..."); */ @@ -526,19 +598,28 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, /* Print the relevant information and print status */ writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6); double dblTime = e->time; writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ - numParticles[0] = (unsigned int)N_total; - writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6); - numParticlesHighWord[0] = (unsigned int)(N_total >> 32); + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); + writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, - 6); + NUM_PARTICLE_TYPES); double MassTable[6] = {0., 0., 0., 0., 0., 0.}; - writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6); + writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); /* Close header */ @@ -556,21 +637,71 @@ void write_output_parallel(struct engine* e, struct UnitSystem* us, /* Print the system of Units */ writeUnitSystem(h_file, us); - /* Create SPH particles group */ - /* message("Writing particle arrays..."); */ - h_grp = - H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating particle group.\n"); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { + + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Add the global information for that particle type to + * the XMF meta-file */ + if (mpi_rank == 0) + writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype); + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } - /* Write particle fields from the particle structure */ - hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, offset, - parts, us); + /* Read particle fields into the particle structure */ + switch (ptype) { - /* Close particle group */ - H5Gclose(h_grp); + case GAS: + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, offset[ptype], + parts, us); + + break; + + case DM: + /* Allocate temporary array */ + if (posix_memalign((void*)&dmparts, gpart_align, + Ndm * sizeof(struct gpart)) != 0) + error( + "Error while allocating temporart memory for " + "DM particles"); + bzero(dmparts, Ndm * sizeof(struct gpart)); + + /* Collect the DM particles from gpart */ + collect_dm_gparts(gparts, Ntot, dmparts, Ndm); + + /* Write DM particles */ + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, + offset[ptype], dmparts, us); + + /* Free temporary array */ + free(dmparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype); + } /* Write LXMF file descriptor */ - if (mpi_rank == 0) writeXMFfooter(xmfFile); + if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ diff --git a/src/parallel_io.h b/src/parallel_io.h index a0589944ec845c712abde1e64e305980748db0e7..663f0aabac44c08682b964512839b925673ea5c5 100644 --- a/src/parallel_io.h +++ b/src/parallel_io.h @@ -32,8 +32,9 @@ #if defined(HAVE_HDF5) && defined(WITH_MPI) && defined(HAVE_PARALLEL_HDF5) void read_ic_parallel(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info); + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info); void write_output_parallel(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..06dc819842d54d952704e4e0c40ebec5b561f691 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,265 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +/* Needs to be included so that strtok returns char * instead of a int *. */ +#include <string.h> +#include <stdlib.h> + +/* This object's header. */ +#include "parser.h" + +/* Local headers. */ +#include "error.h" + +/* Private functions. */ +static int count_char(char *str, char val); +static void parse_line(FILE *fp, struct swift_params *params); + +/** + * @brief Reads an input file and stores each parameter in a structure. + * + * @param file_name Name of file to be read + * @param params Structure to be populated from file + */ + +void parser_read_file(const char *file_name, struct swift_params *params) { + + FILE *fp; + + params->count = 0; + + /* Open file for reading */ + fp = fopen(file_name, "r"); + + if (fp == NULL) { + error("Error opening parameter file: %s", file_name); + } + + /* Read until the end of the file is reached.*/ + while (!feof(fp)) { + parse_line(fp, params); + } + + fclose(fp); +} + +/** + * @brief Counts the number of times a specific character appears in a string. + * + * @param str String to be checked + * @param val Character to be counted + */ + +static int count_char(char *str, char val) { + + int count = 0; + + /* Check if the line contains the character */ + while (*str) { + if (*str++ == val) ++count; + } + + return count; +} + +/** + * @brief Parses a line from a file and stores any parameters in a structure. + * + * @param fp File pointer to file to be read + * @param params Structure to be populated from file + * + */ + +static void parse_line(FILE *fp, struct swift_params *params) { + + char line[PARSER_MAX_LINE_SIZE]; + char trim_line[PARSER_MAX_LINE_SIZE]; + + /* Read a line of the file */ + if (fgets(line, PARSER_MAX_LINE_SIZE, fp) != NULL) { + + char *token; + /* Remove comments */ + token = strtok(line, PARSER_COMMENT_CHAR); + strcpy(trim_line, token); + + /* Check if the line contains a value */ + if (strchr(trim_line, PARSER_VALUE_CHAR)) { + /* Check for more than one parameter on the same line. */ + if (count_char(trim_line, PARSER_VALUE_CHAR) > 1) { + error("Found more than one parameter in '%s', only one allowed.", line); + } else { + /* Take first token as the parameter name. */ + token = strtok(trim_line, PARSER_VALUE_STRING); + strcpy(params->data[params->count].name, token); + + /* Take second token as the parameter value. */ + token = strtok(NULL, " #\n"); + strcpy(params->data[params->count++].value, token); + } + } + } +} + +/** + * @brief Retrieve integer parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_int(struct swift_params *params, char *name, + int *retParam) { + + char str[128]; + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%d%s", retParam, str) != 1) { + error( + "Tried parsing int '%s' but found '%s' with illegal integer " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return; + } + } + + message("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Retrieve float parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_float(struct swift_params *params, char *name, + float *retParam) { + + char str[128]; + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%f%s", retParam, str) != 1) { + error( + "Tried parsing float '%s' but found '%s' with illegal float " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return; + } + } + + message("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Retrieve double parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_double(struct swift_params *params, char *name, + double *retParam) { + + char str[128]; + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%lf", retParam) != 1) { + error( + "Tried parsing double '%s' but found '%s' with illegal double " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return; + } + } + + message("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Retrieve string parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_string(struct swift_params *params, char *name, + char *retParam) { + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + strcpy(retParam, params->data[i].value); + return; + } + } +} + +/** + * @brief Prints the contents of the parameter structure. + * + * @param params Structure that holds the parameters + * + */ + +void parser_print_params(struct swift_params *params) { + + printf("\n--------------------------\n"); + printf("| SWIFT Parameter File |\n"); + printf("--------------------------\n"); + + for (int i = 0; i < params->count; i++) { + printf("Parameter name: %s\n", params->data[i].name); + printf("Parameter value: %s\n", params->data[i].value); + } +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..2fb4148944cd423da016341744cb6d58e222182e --- /dev/null +++ b/src/parser.h @@ -0,0 +1,54 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PARSER_H +#define SWIFT_PARSER_H + +#include <stdio.h> + +#define PARSER_MAX_LINE_SIZE 128 +#define PARSER_MAX_NO_OF_PARAMS 512 + +#define PARSER_COMMENT_CHAR "#" +#define PARSER_VALUE_CHAR ':' +#define PARSER_VALUE_STRING ":" +#define PARSER_END_OF_FILE "..." + +struct parameter { + char name[PARSER_MAX_LINE_SIZE]; + char value[PARSER_MAX_LINE_SIZE]; +}; + +struct swift_params { + struct parameter data[PARSER_MAX_NO_OF_PARAMS]; + int count; +}; + +/* Public API. */ +void parser_read_file(const char *file_name, struct swift_params *params); +void parser_print_params(struct swift_params *params); +void parser_get_param_int(struct swift_params *params, char *name, + int *retParam); +void parser_get_param_float(struct swift_params *params, char *name, + float *retParam); +void parser_get_param_double(struct swift_params *params, char *name, + double *retParam); +void parser_get_param_string(struct swift_params *params, char *name, + char *retParam); + +#endif /* SWIFT_PARSER_H */ diff --git a/src/part.c b/src/part.c index fa87a50d0c4407ac7e20963ca99a8419187f0eee..d5a2bc0ec82c44219509d338f9a5108b3821e11e 100644 --- a/src/part.c +++ b/src/part.c @@ -26,6 +26,7 @@ #endif /* This object's header. */ +#include "error.h" #include "part.h" /** @@ -57,30 +58,17 @@ void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts) { } #ifdef WITH_MPI -/** - * @brief Registers and returns an MPI type for the particles - * - * @param part_type The type container - */ -void part_create_mpi_type(MPI_Datatype* part_type) { - - /* This is not the recommended way of doing this. - One should define the structure field by field - But as long as we don't do serialization via MPI-IO - we don't really care. - Also we would have to modify this function everytime something - is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, - part_type); - MPI_Type_commit(part_type); -} +/* MPI data type for the particle transfers */ +MPI_Datatype part_mpi_type; +MPI_Datatype xpart_mpi_type; +MPI_Datatype gpart_mpi_type; +#endif +#ifdef WITH_MPI /** - * @brief Registers and returns an MPI type for the xparticles - * - * @param xpart_type The type container + * @brief Registers MPI particle types. */ -void xpart_create_mpi_type(MPI_Datatype* xpart_type) { +void part_create_mpi_types() { /* This is not the recommended way of doing this. One should define the structure field by field @@ -88,9 +76,20 @@ void xpart_create_mpi_type(MPI_Datatype* xpart_type) { we don't really care. Also we would have to modify this function everytime something is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE, - xpart_type); - MPI_Type_commit(xpart_type); + if (MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, + &part_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&part_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for parts."); + } + if (MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), + MPI_BYTE, &xpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&xpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for xparts."); + } + if (MPI_Type_contiguous(sizeof(struct gpart) / sizeof(unsigned char), + MPI_BYTE, &gpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for gparts."); + } } - #endif diff --git a/src/part.h b/src/part.h index fa52cefc0d2561a8daa83b4c507e361f1e281f58..1fba171a46cecb7df6ea20ff28ba3bbaefecc7d1 100644 --- a/src/part.h +++ b/src/part.h @@ -36,8 +36,8 @@ /* Some constants. */ #define part_align 64 -#define gpart_align 32 #define xpart_align 32 +#define gpart_align 32 /* Import the right particle definition */ #if defined(MINIMAL_SPH) @@ -55,8 +55,12 @@ void part_relink_gparts(struct part *parts, size_t N, ptrdiff_t offset); void part_relink_parts(struct gpart *gparts, size_t N, struct part *parts); #ifdef WITH_MPI -void part_create_mpi_type(MPI_Datatype* part_type); -void xpart_create_mpi_type(MPI_Datatype* xpart_type); +/* MPI data type for the particle transfers */ +extern MPI_Datatype part_mpi_type; +extern MPI_Datatype xpart_mpi_type; +extern MPI_Datatype gpart_mpi_type; + +void part_create_mpi_types(); #endif #endif /* SWIFT_PART_H */ diff --git a/src/partition.c b/src/partition.c index 0f8eb3ebe334d71228510307dd9ccc4e56e234b3..ea25bc132dacf19b7a5c12765d2a39313fc01486 100644 --- a/src/partition.c +++ b/src/partition.c @@ -424,7 +424,7 @@ static void repart_edge_metis(int partweights, int bothweights, int nodeID, * assume the same graph structure as used in the part_ calls). */ int nr_cells = s->nr_cells; struct cell *cells = s->cells; - float wscale = 1e-3, vscale = 1e-3, wscale_buff; + float wscale = 1e-3, vscale = 1e-3, wscale_buff = 0.0; int wtot = 0; int wmax = 1e9 / nr_nodes; int wmin; diff --git a/src/proxy.c b/src/proxy.c index 7d2e546bf945ca18c2195ea2801d1b2058cb2f58..02263a5653bdcdd2d1bf0a86523ed1a599d4bf21 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -50,11 +50,9 @@ void proxy_cells_exch1(struct proxy *p) { #ifdef WITH_MPI - int k, ind; - /* Get the number of pcells we will need to send. */ p->size_pcells_out = 0; - for (k = 0; k < p->nr_cells_out; k++) + for (int k = 0; k < p->nr_cells_out; k++) p->size_pcells_out += p->cells_out[k]->pcell_size; /* Send the number of pcells. */ @@ -70,7 +68,7 @@ void proxy_cells_exch1(struct proxy *p) { if ((p->pcells_out = malloc(sizeof(struct pcell) * p->size_pcells_out)) == NULL) error("Failed to allocate pcell_out buffer."); - for (ind = 0, k = 0; k < p->nr_cells_out; k++) { + for (int ind = 0, k = 0; k < p->nr_cells_out; k++) { memcpy(&p->pcells_out[ind], p->cells_out[k]->pcell, sizeof(struct pcell) * p->cells_out[k]->pcell_size); ind += p->cells_out[k]->pcell_size; @@ -131,16 +129,14 @@ void proxy_cells_exch2(struct proxy *p) { void proxy_addcell_in(struct proxy *p, struct cell *c) { - int k; - struct cell **temp; - /* Check if the cell is already registered with the proxy. */ - for (k = 0; k < p->nr_cells_in; k++) + for (int k = 0; k < p->nr_cells_in; k++) if (p->cells_in[k] == c) return; /* Do we need to grow the number of in cells? */ if (p->nr_cells_in == p->size_cells_in) { p->size_cells_in *= proxy_buffgrow; + struct cell **temp; if ((temp = malloc(sizeof(struct cell *) * p->size_cells_in)) == NULL) error("Failed to allocate incoming cell list."); memcpy(temp, p->cells_in, sizeof(struct cell *) * p->nr_cells_in); @@ -162,16 +158,14 @@ void proxy_addcell_in(struct proxy *p, struct cell *c) { void proxy_addcell_out(struct proxy *p, struct cell *c) { - int k; - struct cell **temp; - /* Check if the cell is already registered with the proxy. */ - for (k = 0; k < p->nr_cells_out; k++) + for (int k = 0; k < p->nr_cells_out; k++) if (p->cells_out[k] == c) return; /* Do we need to grow the number of out cells? */ if (p->nr_cells_out == p->size_cells_out) { p->size_cells_out *= proxy_buffgrow; + struct cell **temp; if ((temp = malloc(sizeof(struct cell *) * p->size_cells_out)) == NULL) error("Failed to allocate outgoing cell list."); memcpy(temp, p->cells_out, sizeof(struct cell *) * p->nr_cells_out); @@ -195,20 +189,21 @@ void proxy_parts_exch1(struct proxy *p) { #ifdef WITH_MPI /* Send the number of particles. */ - if (MPI_Isend(&p->nr_parts_out, 1, MPI_INT, p->nodeID, + p->buff_out[0] = p->nr_parts_out; + p->buff_out[1] = p->nr_gparts_out; + if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_out) != MPI_SUCCESS) error("Failed to isend nr of parts."); - // message( "isent particle count (%i) from node %i to node %i." , - // p->nr_parts_out , p->mynodeID , p->nodeID ); fflush(stdout); + /* message( "isent particle counts [%i, %i] from node %i to node %i." , + p->buff_out[0], p->buff_out[1], p->mynodeID , p->nodeID ); fflush(stdout); */ /* Send the particle buffers. */ if (p->nr_parts_out > 0) { - if (MPI_Isend(p->parts_out, sizeof(struct part) * p->nr_parts_out, MPI_BYTE, - p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_parts, + if (MPI_Isend(p->parts_out, p->nr_parts_out, part_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, &p->req_parts_out) != MPI_SUCCESS || - MPI_Isend(p->xparts_out, sizeof(struct xpart) * p->nr_parts_out, - MPI_BYTE, p->nodeID, + MPI_Isend(p->xparts_out, p->nr_parts_out, xpart_mpi_type, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_out) != MPI_SUCCESS) error("Failed to isend part data."); @@ -219,14 +214,20 @@ void proxy_parts_exch1(struct proxy *p) { p->parts_out[k].id, p->parts_out[k].x[0], p->parts_out[k].x[1], p->parts_out[k].x[2], p->parts_out[k].h, p->nodeID);*/ } + if (p->nr_gparts_out > 0) { + if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS) + error("Failed to isend part data."); + // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // p->nodeID ); fflush(stdout); + } /* Receive the number of particles. */ - if (MPI_Irecv(&p->nr_parts_in, 1, MPI_INT, p->nodeID, + if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_in) != MPI_SUCCESS) error("Failed to irecv nr of parts."); -// message( "irecv particle count on node %i from node %i." , p->mynodeID , -// p->nodeID ); fflush(stdout); #else error("SWIFT was not compiled with MPI support."); @@ -237,6 +238,10 @@ void proxy_parts_exch2(struct proxy *p) { #ifdef WITH_MPI + /* Unpack the incomming parts counts. */ + p->nr_parts_in = p->buff_in[0]; + p->nr_gparts_in = p->buff_in[1]; + /* Is there enough space in the buffer? */ if (p->nr_parts_in > p->size_parts_in) { do { @@ -250,19 +255,36 @@ void proxy_parts_exch2(struct proxy *p) { p->size_parts_in)) == NULL) error("Failed to re-allocate parts_in buffers."); } + if (p->nr_gparts_in > p->size_gparts_in) { + do { + p->size_gparts_in *= proxy_buffgrow; + } while (p->nr_gparts_in > p->size_gparts_in); + free(p->gparts_in); + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to re-allocate gparts_in buffers."); + } /* Receive the particle buffers. */ if (p->nr_parts_in > 0) { - if (MPI_Irecv(p->parts_in, sizeof(struct part) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_parts, - MPI_COMM_WORLD, &p->req_parts_in) != MPI_SUCCESS || - MPI_Irecv(p->xparts_in, sizeof(struct xpart) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_xparts, + if (MPI_Irecv(p->parts_in, p->nr_parts_in, part_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, + &p->req_parts_in) != MPI_SUCCESS || + MPI_Irecv(p->xparts_in, p->nr_parts_in, xpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_in) != MPI_SUCCESS) error("Failed to irecv part data."); // message( "irecv particle data (%i) from node %i." , p->nr_parts_in , // p->nodeID ); fflush(stdout); } + if (p->nr_gparts_in > 0) { + if (MPI_Irecv(p->gparts_in, p->nr_gparts_in, gpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_in) != MPI_SUCCESS) + error("Failed to irecv gpart data."); + // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , + // p->nodeID ); fflush(stdout); + } #else error("SWIFT was not compiled with MPI support."); @@ -278,8 +300,8 @@ void proxy_parts_exch2(struct proxy *p) { * @param N The number of parts. */ -void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, - int N) { +void proxy_parts_load(struct proxy *p, const struct part *parts, + const struct xpart *xparts, int N) { /* Is there enough space in the buffer? */ if (p->nr_parts_out + N > p->size_parts_out) { @@ -309,6 +331,37 @@ void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, p->nr_parts_out += N; } +/** + * @brief Load parts onto a proxy for exchange. + * + * @param p The #proxy. + * @param gparts Pointer to an array of #gpart to send. + * @param N The number of parts. + */ + +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { + + /* Is there enough space in the buffer? */ + if (p->nr_gparts_out + N > p->size_gparts_out) { + do { + p->size_gparts_out *= proxy_buffgrow; + } while (p->nr_gparts_out + N > p->size_gparts_out); + struct gpart *tp; + if ((tp = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to re-allocate gparts_out buffers."); + memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out); + free(p->gparts_out); + p->gparts_out = tp; + } + + /* Copy the parts and xparts data to the buffer. */ + memcpy(&p->gparts_out[p->nr_gparts_out], gparts, sizeof(struct gpart) * N); + + /* Increase the counters. */ + p->nr_gparts_out += N; +} + /** * @brief Initialize the given proxy. * @@ -358,4 +411,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { error("Failed to allocate parts_out buffers."); } p->nr_parts_out = 0; + + /* Allocate the gpart send and receive buffers, if needed. */ + if (p->gparts_in == NULL) { + p->size_gparts_in = proxy_buffinit; + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to allocate gparts_in buffers."); + } + p->nr_gparts_in = 0; + if (p->gparts_out == NULL) { + p->size_gparts_out = proxy_buffinit; + if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to allocate gparts_out buffers."); + } + p->nr_gparts_out = 0; } diff --git a/src/proxy.h b/src/proxy.h index 3cd33e0f0819ee1ecac53213630445b39c809dea..5a747187e05a78a109ce4523ebb3c9d5fe2ad717 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -32,7 +32,8 @@ #define proxy_tag_count 0 #define proxy_tag_parts 1 #define proxy_tag_xparts 2 -#define proxy_tag_cells 3 +#define proxy_tag_gparts 3 +#define proxy_tag_cells 4 /* Data structure for the proxy. */ struct proxy { @@ -53,14 +54,21 @@ struct proxy { /* The parts and xparts buffers for input and output. */ struct part *parts_in, *parts_out; struct xpart *xparts_in, *xparts_out; + struct gpart *gparts_in, *gparts_out; int size_parts_in, size_parts_out; int nr_parts_in, nr_parts_out; + int size_gparts_in, size_gparts_out; + int nr_gparts_in, nr_gparts_out; + + /* Buffer to hold the incomming/outgoing particle counts. */ + int buff_out[2], buff_in[2]; /* MPI request handles. */ #ifdef WITH_MPI MPI_Request req_parts_count_out, req_parts_count_in; MPI_Request req_parts_out, req_parts_in; MPI_Request req_xparts_out, req_xparts_in; + MPI_Request req_gparts_out, req_gparts_in; MPI_Request req_cells_count_out, req_cells_count_in; MPI_Request req_cells_out, req_cells_in; #endif @@ -68,8 +76,9 @@ struct proxy { /* Function prototypes. */ void proxy_init(struct proxy *p, int mynodeID, int nodeID); -void proxy_parts_load(struct proxy *p, struct part *parts, struct xpart *xparts, - int N); +void proxy_parts_load(struct proxy *p, const struct part *parts, + const struct xpart *xparts, int N); +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_parts_exch1(struct proxy *p); void proxy_parts_exch2(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c); diff --git a/src/queue.c b/src/queue.c index a7321155100df9225526c2f19fac2b99531307e4..6b788d7376ba4bdc95f1b1d918ab52a9514e7b4a 100644 --- a/src/queue.c +++ b/src/queue.c @@ -136,9 +136,6 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, lock_type *qlock = &q->lock; struct task *res = NULL; - /* If there are no tasks, leave immediately. */ - if (q->count == 0) return NULL; - /* Grab the task lock. */ if (blocking) { if (lock_lock(qlock) != 0) error("Locking the qlock failed.\n"); @@ -146,6 +143,12 @@ struct task *queue_gettask(struct queue *q, const struct task *prev, if (lock_trylock(qlock) != 0) return NULL; } + /* If there are no tasks, leave immediately. */ + if (q->count == 0) { + lock_unlock_blind(qlock); + return NULL; + } + /* Set some pointers we will use often. */ int *qtid = q->tid; struct task *qtasks = q->tasks; diff --git a/src/runner.c b/src/runner.c index 7eedb6adc72755ba12faed5429edad43d3849451..fefbb04a4fb0ae55628f2bc5d42f2d140226c5c5 100644 --- a/src/runner.c +++ b/src/runner.c @@ -469,8 +469,10 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) { void runner_doinit(struct runner *r, struct cell *c, int timer) { - struct part *p, *parts = c->parts; + struct part *const parts = c->parts; + struct gpart *const gparts = c->gparts; const int count = c->count; + const int gcount = c->gcount; const int ti_current = r->e->ti_current; TIMER_TIC; @@ -486,7 +488,7 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { for (int i = 0; i < count; i++) { /* Get a direct pointer on the part. */ - p = &parts[i]; + struct part *const p = &parts[i]; if (p->ti_end <= ti_current) { @@ -494,6 +496,19 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { hydro_init_part(p); } } + + /* Loop over the gparts in this cell. */ + for (int i = 0; i < gcount; i++) { + + /* Get a direct pointer on the part. */ + struct gpart *const gp = &gparts[i]; + + if (gp->ti_end <= ti_current) { + + /* Get ready for a density calculation */ + gravity_init_part(gp); + } + } } if (timer) TIMER_TOC(timer_init); @@ -649,7 +664,7 @@ void runner_doghost(struct runner *r, struct cell *c) { } /** - * @brief Drift particles forward in time + * @brief Drift particles and g-particles forward in time * * @param r The runner thread. * @param c The cell. @@ -658,26 +673,39 @@ void runner_doghost(struct runner *r, struct cell *c) { void runner_dodrift(struct runner *r, struct cell *c, int timer) { const int nr_parts = c->count; + const int nr_gparts = c->gcount; const double timeBase = r->e->timeBase; const double dt = (r->e->ti_current - r->e->ti_old) * timeBase; - const float ti_old = r->e->ti_old; - const float ti_current = r->e->ti_current; - struct part *restrict p, *restrict parts = c->parts; - struct xpart *restrict xp, *restrict xparts = c->xparts; - float dx_max = 0.f, h_max = 0.f; - float w; + const int ti_old = r->e->ti_old; + const int ti_current = r->e->ti_current; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; + float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; TIMER_TIC /* No children? */ if (!c->split) { - /* Loop over all the particles in the cell */ + /* Loop over all the g-particles in the cell */ + for (int k = 0; k < nr_gparts; ++k) { + + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; + + /* Drift... */ + gp->x[0] += gp->v_full[0] * dt; + gp->x[1] += gp->v_full[1] * dt; + gp->x[2] += gp->v_full[2] * dt; + } + + /* Loop over all the particles in the cell (more work for these !) */ for (int k = 0; k < nr_parts; k++) { /* Get a handle on the part. */ - p = &parts[k]; - xp = &xparts[k]; + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; /* Useful quantity */ const float h_inv = 1.0f / p->h; @@ -693,32 +721,34 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { p->v[2] += p->a_hydro[2] * dt; /* Predict smoothing length */ - w = p->h_dt * h_inv * dt; - if (fabsf(w) < 0.2f) - p->h *= approx_expf(w); /* 4th order expansion of exp(w) */ + const float w1 = p->h_dt * h_inv * dt; + if (fabsf(w1) < 0.2f) + p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */ else - p->h *= expf(w); + p->h *= expf(w1); /* Predict density */ - w = -3.0f * p->h_dt * h_inv * dt; - if (fabsf(w) < 0.2f) - p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */ + const float w2 = -3.0f * p->h_dt * h_inv * dt; + if (fabsf(w2) < 0.2f) + p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */ else - p->rho *= expf(w); + p->rho *= expf(w2); /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, ti_old, ti_current, timeBase); - /* Compute motion since last cell construction */ - const float dx = - sqrtf((p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + - (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + - (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2])); - dx_max = fmaxf(dx_max, dx); + /* Compute (square of) motion since last cell construction */ + const float dx2 = (p->x[0] - xp->x_old[0]) * (p->x[0] - xp->x_old[0]) + + (p->x[1] - xp->x_old[1]) * (p->x[1] - xp->x_old[1]) + + (p->x[2] - xp->x_old[2]) * (p->x[2] - xp->x_old[2]); + dx2_max = fmaxf(dx2_max, dx2); /* Maximal smoothing length */ h_max = fmaxf(p->h, h_max); } + + /* Now, get the maximal particle motion from its square */ + dx_max = sqrtf(dx2_max); } /* Otherwise, aggregate data from children. */ @@ -758,37 +788,97 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { const double timeBase = r->e->timeBase; const double timeBase_inv = 1.0 / r->e->timeBase; const int count = c->count; + const int gcount = c->gcount; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; const int is_fixdt = (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt; - int new_dti; - int dti_timeline; - - int updated = 0; + int updated = 0, g_updated = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}; float ang[3] = {0.0f, 0.0f, 0.0f}; - float x[3], v_full[3]; - struct part *restrict p, *restrict parts = c->parts; - struct xpart *restrict xp, *restrict xparts = c->xparts; TIMER_TIC /* No children? */ if (!c->split) { + /* Loop over the g-particles and kick the active ones. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *const gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if (gp->id_or_neg_offset > 0 && (is_fixdt || gp->ti_end <= ti_current)) { + + /* First, finish the force calculation */ + gravity_end_force(gp); + + /* Now we are ready to compute the next time-step size */ + int new_dti; + + if (is_fixdt) { + + /* Now we have a time step, proceed with the kick */ + new_dti = global_dt_max * timeBase_inv; + + } else { + + /* Compute the next timestep (gravity condition) */ + float new_dt = gravity_compute_timestep(gp); + + /* Limit timestep within the allowed range */ + new_dt = fminf(new_dt, global_dt_max); + new_dt = fmaxf(new_dt, global_dt_min); + + /* Convert to integer time */ + new_dti = new_dt * timeBase_inv; + + /* Recover the current timestep */ + const int current_dti = gp->ti_end - gp->ti_begin; + + /* Limit timestep increase */ + if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); + + /* Put this timestep on the time line */ + int dti_timeline = max_nr_timesteps; + while (new_dti < dti_timeline) dti_timeline /= 2; + + /* Now we have a time step, proceed with the kick */ + new_dti = dti_timeline; + } + + /* Compute the time step for this kick */ + const int ti_start = (gp->ti_begin + gp->ti_end) / 2; + const int ti_end = gp->ti_end + new_dti / 2; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - gp->ti_end) * timeBase; + + /* Kick particles in momentum space */ + gp->v_full[0] += gp->a_grav[0] * dt; + gp->v_full[1] += gp->a_grav[1] * dt; + gp->v_full[2] += gp->a_grav[2] * dt; + + /* Extra kick work */ + gravity_kick_extra(gp, dt, half_dt); + + /* Number of updated g-particles */ + g_updated++; + } + } + + /* Now do the hydro ones... */ + /* Loop over the particles and kick the active ones. */ for (int k = 0; k < count; k++) { /* Get a handle on the part. */ - p = &parts[k]; - xp = &xparts[k]; - - const float m = p->mass; - x[0] = p->x[0]; - x[1] = p->x[1]; - x[2] = p->x[2]; + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; /* If particle needs to be kicked */ if (is_fixdt || p->ti_end <= ti_current) { @@ -798,8 +888,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* And do the same of the extra variable */ hydro_end_force(p); + if (p->gpart != NULL) gravity_end_force(p->gpart); /* Now we are ready to compute the next time-step size */ + int new_dti; if (is_fixdt) { @@ -808,9 +900,13 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { } else { - /* Compute the next timestep */ + /* Compute the next timestep (hydro condition) */ const float new_dt_hydro = hydro_compute_timestep(p, xp); - const float new_dt_grav = gravity_compute_timestep(p, xp); + + /* Compute the next timestep (gravity condition) */ + float new_dt_grav = FLT_MAX; + if (p->gpart != NULL) + new_dt_grav = gravity_compute_timestep(p->gpart); float new_dt = fminf(new_dt_hydro, new_dt_grav); @@ -835,7 +931,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); /* Put this timestep on the time line */ - dti_timeline = max_nr_timesteps; + int dti_timeline = max_nr_timesteps; while (new_dti < dti_timeline) dti_timeline /= 2; /* Now we have a time step, proceed with the kick */ @@ -845,34 +941,51 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Compute the time step for this kick */ const int ti_start = (p->ti_begin + p->ti_end) / 2; const int ti_end = p->ti_end + new_dti / 2; - const float dt = (ti_end - ti_start) * timeBase; - const float half_dt = (ti_end - p->ti_end) * timeBase; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - p->ti_end) * timeBase; /* Move particle forward in time */ p->ti_begin = p->ti_end; p->ti_end = p->ti_begin + new_dti; + /* Get the acceleration */ + float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; + if (p->gpart != NULL) { + a_tot[0] += p->gpart->a_grav[0]; + a_tot[1] += p->gpart->a_grav[1]; + a_tot[1] += p->gpart->a_grav[2]; + } + /* Kick particles in momentum space */ - xp->v_full[0] += p->a_hydro[0] * dt; - xp->v_full[1] += p->a_hydro[1] * dt; - xp->v_full[2] += p->a_hydro[2] * dt; + xp->v_full[0] += a_tot[0] * dt; + xp->v_full[1] += a_tot[1] * dt; + xp->v_full[2] += a_tot[2] * dt; + + if (p->gpart != NULL) { + p->gpart->v_full[0] = xp->v_full[0]; + p->gpart->v_full[1] = xp->v_full[1]; + p->gpart->v_full[2] = xp->v_full[2]; + } - p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0]; - p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1]; - p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2]; + /* Go back by half-step for the hydro velocity */ + p->v[0] = xp->v_full[0] - half_dt * a_tot[0]; + p->v[1] = xp->v_full[1] - half_dt * a_tot[1]; + p->v[2] = xp->v_full[2] - half_dt * a_tot[2]; /* Extra kick work */ hydro_kick_extra(p, xp, dt, half_dt); + if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt); /* Number of updated particles */ updated++; + if (p->gpart != NULL) g_updated++; } /* Now collect quantities for statistics */ - v_full[0] = xp->v_full[0]; - v_full[1] = xp->v_full[1]; - v_full[2] = xp->v_full[2]; + const double x[3] = {p->x[0], p->x[1], p->x[2]}; + const float v_full[3] = {xp->v_full[0], xp->v_full[1], xp->v_full[2]}; + const float m = p->mass; /* Collect mass */ mass += m; @@ -906,13 +1019,14 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Loop over the progeny. */ for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; + struct cell *const cp = c->progeny[k]; /* Recurse */ runner_dokick(r, cp, 0); /* And aggregate */ updated += cp->updated; + g_updated += cp->g_updated; e_kin += cp->e_kin; e_int += cp->e_int; e_pot += cp->e_pot; @@ -930,6 +1044,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Store the values. */ c->updated = updated; + c->g_updated = g_updated; c->e_kin = e_kin; c->e_int = e_int; c->e_pot = e_pot; diff --git a/src/runner_doiact.h b/src/runner_doiact.h index cf5d56e94169b44e6cd2974a3422a0bc5e4610ac..de339db6133fcc829bdc6ee0ce9e537b68982422 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1235,7 +1235,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { #else /* Does pi need to be updated too? */ - if (pi->dt <= dt_step) { + if (pi->ti_end <= ti_current) { /* Add this interaction to the symmetric queue. */ r2q2[icount2] = r2; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index f374339da75e31b39a5295fcd8bbc23c34d8d67d..02626295a49f314fef840bc044a476f5c9cf332d 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -267,9 +267,9 @@ void runner_dograv_down(struct runner *r, struct cell *c) { /* Apply the multipole acceleration to all gparts. */ for (int k = 0; k < c->gcount; k++) { struct gpart *p = &c->gparts[k]; - p->a[0] += m->a[0]; - p->a[1] += m->a[1]; - p->a[2] += m->a[2]; + p->a_grav[0] += m->a[0]; + p->a_grav[1] += m->a[1]; + p->a_grav[2] += m->a[2]; } } } @@ -594,5 +594,4 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, if (gettimer) TIMER_TOC(timer_dosub_grav); } - #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/scheduler.c b/src/scheduler.c index 722e344b5a86b5fbdc42c7038fd3cb00e44b2ee8..38a1cd8c663307e0c0378d8bec2e0cd3d8f37fa8 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -95,32 +95,29 @@ void scheduler_addunlock(struct scheduler *s, struct task *ta, void scheduler_splittasks(struct scheduler *s) { - int j, k, ind, sid, tid = 0, redo; - struct cell *ci, *cj; - double hi, hj, shift[3]; - struct task *t, *t_old; - // float dt_step = s->dt_step; - int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, - {-1, -1, 11, 10, 5, 4, 2, 1}, - {-1, -1, -1, 12, 7, 6, 4, 3}, - {-1, -1, -1, -1, 8, 7, 5, 4}, - {-1, -1, -1, -1, -1, 12, 10, 9}, - {-1, -1, -1, -1, -1, -1, 11, 10}, - {-1, -1, -1, -1, -1, -1, -1, 12}}; - float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, - 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; + const int pts[7][8] = {{-1, 12, 10, 9, 4, 3, 1, 0}, + {-1, -1, 11, 10, 5, 4, 2, 1}, + {-1, -1, -1, 12, 7, 6, 4, 3}, + {-1, -1, -1, -1, 8, 7, 5, 4}, + {-1, -1, -1, -1, -1, 12, 10, 9}, + {-1, -1, -1, -1, -1, -1, 11, 10}, + {-1, -1, -1, -1, -1, -1, -1, 12}}; + const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, + 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, + 0.5788, 0.4025, 0.5788}; /* Loop through the tasks... */ - redo = 0; - t_old = t = NULL; + int tid = 0, redo = 0; + struct task *t_old = NULL; while (1) { /* Get a pointer on the task. */ + struct task *t = t_old; if (redo) { redo = 0; - t = t_old; } else { - if ((ind = atomic_inc(&tid)) < s->nr_tasks) + const int ind = atomic_inc(&tid); + if (ind < s->nr_tasks) t_old = t = &s->tasks[s->tasks_ind[ind]]; else break; @@ -161,7 +158,7 @@ void scheduler_splittasks(struct scheduler *s) { if (t->type == task_type_self) { /* Get a handle on the cell involved. */ - ci = t->ci; + struct cell *ci = t->ci; /* Foreign task? */ if (ci->nodeID != s->nodeID) { @@ -187,18 +184,18 @@ void scheduler_splittasks(struct scheduler *s) { redo = 1; /* Add the self task. */ - for (k = 0; ci->progeny[k] == NULL; k++) - ; - t->ci = ci->progeny[k]; - for (k += 1; k < 8; k++) + int first_child = 0; + while (ci->progeny[first_child] == NULL) first_child++; + t->ci = ci->progeny[first_child]; + for (int k = first_child + 1; k < 8; k++) if (ci->progeny[k] != NULL) scheduler_addtask(s, task_type_self, t->subtype, 0, 0, ci->progeny[k], NULL, 0); /* Make a task for each pair of progeny. */ - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL) - for (k = j + 1; k < 8; k++) + for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL) scheduler_addtask(s, task_type_pair, t->subtype, pts[j][k], 0, ci->progeny[j], ci->progeny[k], 0); @@ -211,10 +208,10 @@ void scheduler_splittasks(struct scheduler *s) { else if (t->type == task_type_pair) { /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; - hi = ci->dmin; - hj = cj->dmin; + struct cell *ci = t->ci; + struct cell *cj = t->cj; + const double hi = ci->dmin; + const double hj = cj->dmin; /* Foreign task? */ if (ci->nodeID != s->nodeID && cj->nodeID != s->nodeID) { @@ -224,7 +221,8 @@ void scheduler_splittasks(struct scheduler *s) { /* Get the sort ID, use space_getsid and not t->flags to make sure we get ci and cj swapped if needed. */ - sid = space_getsid(s->space, &ci, &cj, shift); + double shift[3]; + int sid = space_getsid(s->space, &ci, &cj, shift); /* Should this task be split-up? */ if (ci->split && cj->split && @@ -480,9 +478,9 @@ void scheduler_splittasks(struct scheduler *s) { /* Replace the current task. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (cj->progeny[k] != NULL) { t = scheduler_addtask(s, task_type_pair, t->subtype, 0, 0, ci->progeny[j], cj->progeny[k], 0); @@ -521,8 +519,8 @@ void scheduler_splittasks(struct scheduler *s) { else if (t->type == task_type_grav_mm) { /* Get a handle on the cells involved. */ - ci = t->ci; - cj = t->cj; + struct cell *ci = t->ci; + struct cell *cj = t->cj; /* Self-interaction? */ if (cj == NULL) { @@ -546,7 +544,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Split this task into tasks on its progeny. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -555,7 +553,7 @@ void scheduler_splittasks(struct scheduler *s) { } else t = scheduler_addtask(s, task_type_grav_mm, task_subtype_none, 0, 0, ci->progeny[j], NULL, 0); - for (k = j + 1; k < 8; k++) + for (int k = j + 1; k < 8; k++) if (ci->progeny[k] != NULL && ci->progeny[k]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -594,7 +592,7 @@ void scheduler_splittasks(struct scheduler *s) { /* Get the opening angle theta. */ float dx[3], theta; - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { dx[k] = fabs(ci->loc[k] - cj->loc[k]); if (s->space->periodic && dx[k] > 0.5 * s->space->dim[k]) dx[k] = -dx[k] + s->space->dim[k]; @@ -615,9 +613,9 @@ void scheduler_splittasks(struct scheduler *s) { /* Split this task into tasks on its progeny. */ t->type = task_type_none; - for (j = 0; j < 8; j++) + for (int j = 0; j < 8; j++) if (ci->progeny[j] != NULL && ci->progeny[j]->gcount > 0) { - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (cj->progeny[k] != NULL && cj->progeny[k]->gcount > 0) { if (t->type == task_type_none) { t->type = task_type_grav_mm; @@ -663,17 +661,14 @@ struct task *scheduler_addtask(struct scheduler *s, int type, int subtype, int flags, int wait, struct cell *ci, struct cell *cj, int tight) { - int ind; - struct task *t; - /* Get the next free task. */ - ind = atomic_inc(&s->tasks_next); + const int ind = atomic_inc(&s->tasks_next); /* Overflow? */ if (ind >= s->size) error("Task list overflow."); /* Get a pointer to the new task. */ - t = &s->tasks[ind]; + struct task *t = &s->tasks[ind]; /* Copy the data. */ t->type = type; @@ -768,24 +763,24 @@ void scheduler_set_unlocks(struct scheduler *s) { void scheduler_ranktasks(struct scheduler *s) { - int i, j = 0, k, temp, left = 0, rank; - struct task *t, *tasks = s->tasks; - int *tid = s->tasks_ind, nr_tasks = s->nr_tasks; + struct task *tasks = s->tasks; + int *tid = s->tasks_ind; + const int nr_tasks = s->nr_tasks; /* Run through the tasks and get all the waits right. */ - for (i = 0, k = 0; k < nr_tasks; k++) { + for (int k = 0; k < nr_tasks; k++) { tid[k] = k; - for (j = 0; j < tasks[k].nr_unlock_tasks; j++) + for (int j = 0; j < tasks[k].nr_unlock_tasks; j++) tasks[k].unlock_tasks[j]->wait += 1; } /* Main loop. */ - for (j = 0, rank = 0; left < nr_tasks; rank++) { + for (int j = 0, rank = 0, left = 0; left < nr_tasks; rank++) { /* Load the tids of tasks with no waits. */ - for (k = left; k < nr_tasks; k++) + for (int k = left; k < nr_tasks; k++) if (tasks[tid[k]].wait == 0) { - temp = tid[j]; + int temp = tid[j]; tid[j] = tid[k]; tid[k] = temp; j += 1; @@ -795,15 +790,16 @@ void scheduler_ranktasks(struct scheduler *s) { if (j == left) error("Unsatisfiable task dependencies detected."); /* Unlock the next layer of tasks. */ - for (i = left; i < j; i++) { - t = &tasks[tid[i]]; + for (int i = left; i < j; i++) { + struct task *t = &tasks[tid[i]]; t->rank = rank; tid[i] = t - tasks; if (tid[i] >= nr_tasks) error("Task index overshoot."); /* message( "task %i of type %s has rank %i." , i , (t->type == task_type_self) ? "self" : (t->type == task_type_pair) ? "pair" : "sort" , rank ); */ - for (k = 0; k < t->nr_unlock_tasks; k++) t->unlock_tasks[k]->wait -= 1; + for (int k = 0; k < t->nr_unlock_tasks; k++) + t->unlock_tasks[k]->wait -= 1; } /* The new left (no, not tony). */ @@ -825,8 +821,6 @@ void scheduler_ranktasks(struct scheduler *s) { void scheduler_reset(struct scheduler *s, int size) { - int k; - /* Do we need to re-allocate? */ if (size > s->size) { @@ -853,7 +847,7 @@ void scheduler_reset(struct scheduler *s, int size) { s->nr_unlocks = 0; /* Set the task pointers in the queues. */ - for (k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; + for (int k = 0; k < s->nr_queues; k++) s->queues[k].tasks = s->tasks; } /** @@ -864,21 +858,23 @@ void scheduler_reset(struct scheduler *s, int size) { void scheduler_reweight(struct scheduler *s) { - int k, j, nr_tasks = s->nr_tasks, *tid = s->tasks_ind; - struct task *t, *tasks = s->tasks; - int nodeID = s->nodeID; - float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.1897, - 0.4025, 0.1897, 0.4025, 0.5788, 0.4025, 0.5788}; - float wscale = 0.001; + const int nr_tasks = s->nr_tasks; + int *tid = s->tasks_ind; + struct task *tasks = s->tasks; + const int nodeID = s->nodeID; + const float sid_scale[13] = {0.1897, 0.4025, 0.1897, 0.4025, 0.5788, + 0.4025, 0.1897, 0.4025, 0.1897, 0.4025, + 0.5788, 0.4025, 0.5788}; + const float wscale = 0.001; // ticks tic; /* Run through the tasks backwards and set their waits and weights. */ // tic = getticks(); - for (k = nr_tasks - 1; k >= 0; k--) { - t = &tasks[tid[k]]; + for (int k = nr_tasks - 1; k >= 0; k--) { + struct task *t = &tasks[tid[k]]; t->weight = 0; - for (j = 0; j < t->nr_unlock_tasks; j++) + for (int j = 0; j < t->nr_unlock_tasks; j++) if (t->unlock_tasks[j]->weight > t->weight) t->weight = t->unlock_tasks[j]->weight; if (!t->implicit && t->tic > 0) @@ -959,8 +955,9 @@ void scheduler_reweight(struct scheduler *s) { void scheduler_start(struct scheduler *s, unsigned int mask, unsigned int submask) { - int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; - struct task *t, *tasks = s->tasks; + const int nr_tasks = s->nr_tasks; + int *tid = s->tasks_ind; + struct task *tasks = s->tasks; // ticks tic; /* Store the masks */ @@ -986,8 +983,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, const int waiting_old = s->waiting; /* We are going to use the task structure in a modified way to pass - information - to the task. Don't do this at home ! + information to the task. Don't do this at home ! - ci and cj will give the range of tasks to which the waits will be applied - the flags will be used to transfer the mask - the rank will be used to transfer the submask @@ -1012,6 +1008,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Wait for the rewait tasks to have executed. */ pthread_mutex_lock(&s->sleep_mutex); + pthread_cond_broadcast(&s->sleep_cond); while (s->waiting > waiting_old) { pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); } @@ -1025,7 +1022,7 @@ void scheduler_start(struct scheduler *s, unsigned int mask, /* Loop over the tasks and enqueue whoever is ready. */ // tic = getticks(); for (int k = 0; k < s->nr_tasks; k++) { - t = &tasks[tid[k]]; + struct task *t = &tasks[tid[k]]; if (atomic_dec(&t->wait) == 1 && ((1 << t->type) & s->mask) && ((1 << t->subtype) & s->submask) && !t->skip) { scheduler_enqueue(s, t); @@ -1033,6 +1030,11 @@ void scheduler_start(struct scheduler *s, unsigned int mask, } } + /* To be safe, fire of one last sleep_cond in a safe way. */ + pthread_mutex_lock(&s->sleep_mutex); + pthread_cond_broadcast(&s->sleep_cond); + pthread_mutex_unlock(&s->sleep_mutex); + // message( "enqueueing tasks took %.3f %s." , // clocks_from_ticks( getticks() - tic ), clocks_getunit()); } @@ -1046,10 +1048,8 @@ void scheduler_start(struct scheduler *s, unsigned int mask, void scheduler_enqueue(struct scheduler *s, struct task *t) { + /* The target queue for this task. */ int qid = -1; -#ifdef WITH_MPI - int err; -#endif /* Fail if this task has already been enqueued before. */ if (t->rid >= 0) error("Task has already been enqueued."); @@ -1071,6 +1071,9 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { /* Otherwise, look for a suitable queue. */ else { +#ifdef WITH_MPI + int err; +#endif /* Find the previous owner for each task type, and do any pre-processing needed. */ @@ -1093,13 +1096,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - if ((err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type, - t->ci->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to emit irecv for particle data (%s).", buff); + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, + t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (err != MPI_SUCCESS) { + mpi_error(err, "Failed to emit irecv for particle data."); } // message( "receiving %i parts with tag=%i from %i to %i." , // t->ci->count , t->flags , t->ci->nodeID , s->nodeID ); @@ -1111,13 +1111,10 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - if ((err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type, - t->cj->nodeID, t->flags, MPI_COMM_WORLD, - &t->req)) != MPI_SUCCESS) { - char buff[MPI_MAX_ERROR_STRING]; - int len; - MPI_Error_string(err, buff, &len); - error("Failed to emit isend for particle data (%s).", buff); + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, + t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); + if (err != MPI_SUCCESS) { + mpi_error(err, "Failed to emit isend for particle data."); } // message( "sending %i parts with tag=%i from %i to %i." , // t->ci->count , t->flags , s->nodeID , t->cj->nodeID ); @@ -1133,7 +1130,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { if (qid >= s->nr_queues) error("Bad computed qid."); - /* If no previous owner, find the shortest queue. */ + /* If no previous owner, pick a random queue. */ if (qid < 0) qid = rand() % s->nr_queues; /* Increase the waiting counter. */ @@ -1164,7 +1161,7 @@ struct task *scheduler_done(struct scheduler *s, struct task *t) { for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; - int res = atomic_dec(&t2->wait); + const int res = atomic_dec(&t2->wait); if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1203,7 +1200,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t) { they are ready. */ for (int k = 0; k < t->nr_unlock_tasks; k++) { struct task *t2 = t->unlock_tasks[k]; - int res = atomic_dec(&t2->wait); + const int res = atomic_dec(&t2->wait); if (res < 1) { error("Negative wait!"); } else if (res == 1) { @@ -1240,7 +1237,7 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, const struct task *prev) { struct task *res = NULL; - int k, nr_queues = s->nr_queues; + const int nr_queues = s->nr_queues; unsigned int seed = qid; /* Check qid. */ @@ -1264,10 +1261,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, /* If unsuccessful, try stealing from the other queues. */ if (s->flags & scheduler_flag_steal) { int count = 0, qids[nr_queues]; - for (k = 0; k < nr_queues; k++) + for (int k = 0; k < nr_queues; k++) if (s->queues[k].count > 0) qids[count++] = k; - for (k = 0; k < scheduler_maxsteal && count > 0; k++) { - int ind = rand_r(&seed) % count; + for (int k = 0; k < scheduler_maxsteal && count > 0; k++) { + const int ind = rand_r(&seed) % count; TIMER_TIC res = queue_gettask(&s->queues[qids[ind]], prev, 0); TIMER_TOC(timer_qsteal); @@ -1287,7 +1284,10 @@ struct task *scheduler_gettask(struct scheduler *s, int qid, if (res == NULL) { #endif pthread_mutex_lock(&s->sleep_mutex); - if (s->waiting > 0) pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); + res = queue_gettask(&s->queues[qid], prev, 1); + if (res == NULL && s->waiting > 0) { + pthread_cond_wait(&s->sleep_cond, &s->sleep_mutex); + } pthread_mutex_unlock(&s->sleep_mutex); } } @@ -1352,12 +1352,6 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, s->tasks = NULL; s->tasks_ind = NULL; scheduler_reset(s, nr_tasks); - -/* Construct types for MPI communications */ -#ifdef WITH_MPI - part_create_mpi_type(&s->part_mpi_type); - xpart_create_mpi_type(&s->xpart_mpi_type); -#endif } /** @@ -1366,7 +1360,7 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, * @param s The #scheduler * @param fileName Name of the file to write to */ -void scheduler_print_tasks(struct scheduler *s, char *fileName) { +void scheduler_print_tasks(const struct scheduler *s, const char *fileName) { const int nr_tasks = s->nr_tasks, *tid = s->tasks_ind; struct task *t, *tasks = s->tasks; diff --git a/src/scheduler.h b/src/scheduler.h index 3f2d8c289d0d691d0d155b20ae0522c5830524aa..64c694aea295c13810a20b626055fc6c15eb0af8 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -100,12 +100,6 @@ struct scheduler { /* The node we are working on. */ int nodeID; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ @@ -128,7 +122,7 @@ struct task *scheduler_unlock(struct scheduler *s, struct task *t); void scheduler_addunlock(struct scheduler *s, struct task *ta, struct task *tb); void scheduler_set_unlocks(struct scheduler *s); void scheduler_dump_queue(struct scheduler *s); -void scheduler_print_tasks(struct scheduler *s, char *fileName); +void scheduler_print_tasks(const struct scheduler *s, const char *fileName); void scheduler_do_rewait(struct task *t_begin, struct task *t_end, unsigned int mask, unsigned int submask); diff --git a/src/serial_io.c b/src/serial_io.c index 8e63db5cfad3a3b50fc7e350bbac6ce09708230a..40bd2b1c8921f4acbfa0950984d6915ebd3d241e 100644 --- a/src/serial_io.c +++ b/src/serial_io.c @@ -57,18 +57,18 @@ * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param importance If COMPULSORY, the data must be present in the IC file. If *OPTIONAL, the array will be zeroed when the data is not present. * * @todo A better version using HDF5 hyper-slabs to read the file directly into *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int dim, long long N_total, long long offset, - char* part_c, enum DATA_IMPORTANCE importance) { + char* part_c, size_t partSize, + enum DATA_IMPORTANCE importance) { hid_t h_data = 0, h_err = 0, h_type = 0, h_memspace = 0, h_filespace = 0; hsize_t shape[2], offsets[2]; htri_t exist = 0; @@ -76,7 +76,6 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; /* Check whether the dataspace exists or not */ @@ -172,9 +171,10 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * Routines writing an output file *-----------------------------------------------------------------------------*/ -void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, long long N_total, int dim, - struct UnitSystem* us, enum UnitConversionFactor convFactor) { +void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + long long N_total, int dim, struct UnitSystem* us, + enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0; int rank = 0; hsize_t shape[2]; @@ -234,7 +234,7 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - writeXMFline(xmfFile, fileName, name, N_total, dim, type); + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N_total, dim, type); /* Write unit conversion factors for this data set */ conversionString(buffer, us, convFactor); @@ -255,21 +255,22 @@ void prepareArray(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param grp The group in which to write. * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array to write. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param us The UnitSystem currently in use - * @param convFactor The UnitConversionFactor for this array - * - * - * Calls #error() if an error occurs. + * @param convFactor The UnitConversionFactor for this arrayo */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, long long N_total, - int mpi_rank, long long offset, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, long long N_total, int mpi_rank, + long long offset, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { @@ -279,15 +280,14 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; /* message("Writing '%s' array...", name); */ /* Prepare the arrays in the file */ if (mpi_rank == 0) - prepareArray(grp, fileName, xmfFile, name, type, N_total, dim, us, - convFactor); + prepareArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N_total, + dim, us, convFactor); /* Allocate temporary buffer */ temp = malloc(N * dim * sizeOfType(type)); @@ -362,7 +362,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, #define readArray(grp, name, type, N, dim, part, N_total, offset, field, \ importance) \ readArrayBackEnd(grp, name, type, N, dim, N_total, offset, \ - (char*)(&(part[0]).field), importance) + (char*)(&(part[0]).field), sizeof(part[0]), importance) /** * @brief A helper macro to call the readArrayBackEnd function more easily. @@ -371,34 +371,47 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName Unused parameter in non-MPI mode * @param xmfFile Unused parameter in non-MPI mode * @param name The name of the array to write. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part A (char*) pointer on the first occurrence of the field of - *interest - *in the parts array + *interest in the parts array + * @param N_total Unused parameter in non-MPI mode + * @param mpi_rank Unused parameter in non-MPI mode + * @param offset Unused parameter in non-MPI mode * @param field The name (code name) of the field to read from. * @param us The UnitSystem currently in use * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, N_total, \ - mpi_rank, offset, (char*)(&(part[0]).field), us, \ - convFactor) +#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, part, N_total, mpi_rank, offset, field, us, \ + convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, N_total, mpi_rank, offset, (char*)(&(part[0]).field), \ + sizeof(part[0]), us, convFactor) /* Import the right hydro definition */ #include "hydro_io.h" +/* Import the right gravity definition */ +#include "gravity_io.h" /** * @brief Reads an HDF5 initial condition file (GADGET-3 type) * * @param fileName The file to read. * @param dim (output) The dimension of the volume read from the file. - * @param parts (output) The array of #part read from the file. - * @param N (output) The number of particles read from the file. + * @param parts (output) The array of #part (gas particles) read from the file. + * @param gparts (output) The array of #gpart read from the file. + * @param Ngas (output) The number of #part read from the file on that node. + * @param Ngparts (output) The number of #gpart read from the file on that node. * @param periodic (output) 1 if the volume is periodic, 0 if not. + * @param mpi_rank The MPI rank of this node + * @param mpi_size The number of MPI ranks + * @param comm The MPI communicator + * @param info The MPI information object * * Opens the HDF5 file fileName and reads the particles contained * in the parts array. N is the returned number of particles found @@ -411,17 +424,18 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * */ void read_ic_serial(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info) { + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info) { hid_t h_file = 0, h_grp = 0; - double boxSize[3] = {0.0, -1.0, -1.0}; /* GADGET has only cubic boxes (in cosmological mode) */ - int numParticles[6] = {0}; - /* GADGET has 6 particle types. We only keep the type 0*/ - int numParticles_highWord[6] = {0}; - long long offset = 0; - long long N_total = 0; - int rank; + double boxSize[3] = {0.0, -1.0, -1.0}; + /* GADGET has 6 particle types. We only keep the type 0 & 1 for now*/ + int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; /* First read some information about the content */ if (mpi_rank == 0) { @@ -453,8 +467,10 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, readAttribute(h_grp, "NumPart_Total", UINT, numParticles); readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); - N_total = ((long long)numParticles[0]) + - ((long long)numParticles_highWord[0] << 32); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); + dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; @@ -474,22 +490,38 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, /* Now need to broadcast that information to all ranks. */ MPI_Bcast(periodic, 1, MPI_INT, 0, comm); - MPI_Bcast(&N_total, 1, MPI_LONG_LONG, 0, comm); + MPI_Bcast(&N_total, NUM_PARTICLE_TYPES, MPI_LONG_LONG, 0, comm); MPI_Bcast(dim, 3, MPI_DOUBLE, 0, comm); /* Divide the particles among the tasks. */ - offset = mpi_rank * N_total / mpi_size; - *N = (mpi_rank + 1) * N_total / mpi_size - offset; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + offset[ptype] = mpi_rank * N_total[ptype] / mpi_size; + N[ptype] = (mpi_rank + 1) * N_total[ptype] / mpi_size - offset[ptype]; + } - /* Allocate memory to store particles */ - if (posix_memalign((void*)parts, part_align, (*N) * sizeof(struct part)) != 0) + /* Allocate memory to store SPH particles */ + *Ngas = N[0]; + if (posix_memalign((void*)parts, part_align, (*Ngas) * sizeof(struct part)) != + 0) error("Error while allocating memory for particles"); - bzero(*parts, *N * sizeof(struct part)); + bzero(*parts, *Ngas * sizeof(struct part)); + + /* Allocate memory to store all particles */ + const size_t Ndm = N[1]; + *Ngparts = N[1] + N[0]; + if (posix_memalign((void*)gparts, gpart_align, + *Ngparts * sizeof(struct gpart)) != 0) + error("Error while allocating memory for gravity particles"); + bzero(*gparts, *Ngparts * sizeof(struct gpart)); + /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / */ /* (1024.*1024.)); */ + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */ + /* Now loop over ranks and read the data */ - for (rank = 0; rank < mpi_size; ++rank) { + for (int rank = 0; rank < mpi_size; ++rank) { /* Is it this rank's turn to read ? */ if (rank == mpi_rank) { @@ -498,17 +530,41 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, if (h_file < 0) error("Error while opening file '%s' on rank %d.", fileName, mpi_rank); - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) - error("Error while opening particle group on rank %d.\n", mpi_rank); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Read particle fields into the particle structure */ - hydro_read_particles(h_grp, *N, N_total, offset, *parts); + /* Don't do anything if no particle of this kind */ + if (N[ptype] == 0) continue; - /* Close particle group */ - H5Gclose(h_grp); + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + hydro_read_particles(h_grp, N[ptype], N_total[ptype], offset[ptype], + *parts); + break; + + case DM: + darkmatter_read_particles(h_grp, N[ptype], N_total[ptype], + offset[ptype], *gparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + } /* Close file */ H5Fclose(h_file); @@ -518,6 +574,12 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, MPI_Barrier(comm); } + /* Prepare the DM particles */ + prepare_dm_gparts(*gparts, Ndm); + + /* Now duplicate the hydro particle into gparts */ + duplicate_hydro_gparts(*parts, *gparts, *Ngas, Ndm); + /* message("Done Reading particles..."); */ } @@ -525,7 +587,11 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, * @brief Writes an HDF5 output file (GADGET-3 type) with its XMF descriptor * * @param e The engine containing all the system. - * @param us The UnitSystem used for the conversion of units in the output + * @param us The UnitSystem used for the conversion of units in the output. + * @param mpi_rank The MPI rank of this node. + * @param mpi_size The number of MPI ranks. + * @param comm The MPI communicator. + * @param info The MPI information object * * Creates an HDF5 output file and writes the particles contained * in the engine. If such a file already exists, it is erased and replaced @@ -538,35 +604,40 @@ void read_ic_serial(char* fileName, double dim[3], struct part** parts, void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info) { hid_t h_file = 0, h_grp = 0, h_grpsph = 0; - int N = e->s->nr_parts; + const size_t Ngas = e->s->nr_parts; + const size_t Ntot = e->s->nr_gparts; int periodic = e->s->periodic; - int numParticles[6] = {N, 0}; - int numParticlesHighWord[6] = {0}; - unsigned int flagEntropy[6] = {0}; - long long N_total = 0, offset = 0; - double offset_d = 0., N_d = 0., N_total_d = 0.; int numFiles = 1; - int rank = 0; struct part* parts = e->s->parts; - FILE* xmfFile = 0; + struct gpart* gparts = e->s->gparts; + struct gpart* dmparts = NULL; static int outputCount = 0; + FILE* xmfFile = 0; + + /* Number of particles of each type */ + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ /* File name */ - char fileName[200]; - sprintf(fileName, "output_%03i.hdf5", outputCount); + char fileName[FILENAME_BUFFER_SIZE]; + snprintf(fileName, FILENAME_BUFFER_SIZE, "output_%03i.hdf5", outputCount); /* Compute offset in the file and total number of particles */ - /* Done using double to allow for up to 2^50=10^15 particles */ - N_d = (double)N; - MPI_Exscan(&N_d, &offset_d, 1, MPI_DOUBLE, MPI_SUM, comm); - N_total_d = offset_d + N_d; - MPI_Bcast(&N_total_d, 1, MPI_DOUBLE, mpi_size - 1, comm); - if (N_total_d > 1.e15) - error( - "Error while computing the offset for parallel output: Simulation has " - "more than 10^15 particles.\n"); - N_total = (long long)N_total_d; - offset = (long long)offset_d; + size_t N[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; + long long N_total[NUM_PARTICLE_TYPES] = {0}; + long long offset[NUM_PARTICLE_TYPES] = {0}; + MPI_Exscan(&N, &offset, NUM_PARTICLE_TYPES, MPI_LONG_LONG, MPI_SUM, comm); + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N_total[ptype] = offset[ptype] + N[ptype]; + + /* The last rank now has the correct N_total. Let's broadcast from there */ + MPI_Bcast(&N_total, 6, MPI_LONG_LONG, mpi_size - 1, comm); + + /* Now everybody konws its offset and the total number of particles of each + * type */ /* Do common stuff first */ if (mpi_rank == 0) { @@ -578,7 +649,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, xmfFile = prepareXMFfile(); /* Write the part corresponding to this specific output */ - writeXMFheader(xmfFile, N_total, fileName, e->time); + writeXMFoutputheader(xmfFile, fileName, e->time); /* Open file */ /* message("Opening file '%s'.", fileName); */ @@ -610,15 +681,24 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ - numParticles[0] = (unsigned int)N_total; - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, 6); - writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, 6); - numParticlesHighWord[0] = (unsigned int)(N_total >> 32); + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); + writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, - 6); + NUM_PARTICLE_TYPES); double MassTable[6] = {0., 0., 0., 0., 0., 0.}; - writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, 6); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, 6); + writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); /* Close header */ @@ -636,21 +716,32 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, /* Print the system of Units */ writeUnitSystem(h_file, us); - /* Create SPH particles group */ - /* message("Writing particle arrays..."); */ - h_grp = - H5Gcreate(h_file, "/PartType0", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); - if (h_grp < 0) error("Error while creating particle group.\n"); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Close particle group */ - H5Gclose(h_grp); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gcreate(h_file, partTypeGroupName, H5P_DEFAULT, H5P_DEFAULT, + H5P_DEFAULT); + if (h_grp < 0) { + error("Error while creating particle group.\n"); + } + + /* Close particle group */ + H5Gclose(h_grp); + } /* Close file */ H5Fclose(h_file); } /* Now loop over ranks and write the data */ - for (rank = 0; rank < mpi_size; ++rank) { + for (int rank = 0; rank < mpi_size; ++rank) { /* Is it this rank's turn to write ? */ if (rank == mpi_rank) { @@ -659,18 +750,65 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, if (h_file < 0) error("Error while opening file '%s' on rank %d.", fileName, mpi_rank); - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - h_grp = H5Gopen(h_file, "/PartType0", H5P_DEFAULT); - if (h_grp < 0) - error("Error while opening particle group on rank %d.\n", mpi_rank); + /* Loop over all particle types */ + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { - /* Write particle fields from the particle structure */ - hydro_write_particles(h_grp, fileName, xmfFile, N, N_total, mpi_rank, - offset, parts, us); + /* Don't do anything if no particle of this kind */ + if (N_total[ptype] == 0) continue; - /* Close particle group */ - H5Gclose(h_grp); + /* Add the global information for that particle type to the XMF + * meta-file */ + if (mpi_rank == 0) + writeXMFgroupheader(xmfFile, fileName, N_total[ptype], ptype); + + /* Open the particle group in the file */ + char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; + snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", + ptype); + h_grp = H5Gopen(h_file, partTypeGroupName, H5P_DEFAULT); + if (h_grp < 0) { + error("Error while opening particle group %s.", partTypeGroupName); + } + + /* Read particle fields into the particle structure */ + switch (ptype) { + + case GAS: + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + N[ptype], N_total[ptype], mpi_rank, + offset[ptype], parts, us); + + break; + + case DM: + /* Allocate temporary array */ + if (posix_memalign((void*)&dmparts, gpart_align, + Ndm * sizeof(struct gpart)) != 0) + error("Error while allocating temporart memory for DM particles"); + bzero(dmparts, Ndm * sizeof(struct gpart)); + + /* Collect the DM particles from gpart */ + collect_dm_gparts(gparts, Ntot, dmparts, Ndm); + + /* Write DM particles */ + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, + xmfFile, N[ptype], N_total[ptype], + mpi_rank, offset[ptype], dmparts, us); + + /* Free temporary array */ + free(dmparts); + break; + + default: + error("Particle Type %d not yet supported. Aborting", ptype); + } + + /* Close particle group */ + H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + if (mpi_rank == 0) writeXMFgroupfooter(xmfFile, ptype); + } /* Close file */ H5Fclose(h_file); @@ -681,7 +819,7 @@ void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, } /* Write footer of LXMF file descriptor */ - if (mpi_rank == 0) writeXMFfooter(xmfFile); + if (mpi_rank == 0) writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ ++outputCount; diff --git a/src/serial_io.h b/src/serial_io.h index 95f09f5977a97a359e978db7a1b71b02030d6a14..5a34d420cfabd88d4147e3f3630e0efe89951c41 100644 --- a/src/serial_io.h +++ b/src/serial_io.h @@ -32,8 +32,9 @@ #if defined(HAVE_HDF5) && defined(WITH_MPI) && !defined(HAVE_PARALLEL_HDF5) void read_ic_serial(char* fileName, double dim[3], struct part** parts, - size_t* N, int* periodic, int mpi_rank, int mpi_size, - MPI_Comm comm, MPI_Info info); + struct gpart** gparts, size_t* Ngas, size_t* Ngparts, + int* periodic, int mpi_rank, int mpi_size, MPI_Comm comm, + MPI_Info info); void write_output_serial(struct engine* e, struct UnitSystem* us, int mpi_rank, int mpi_size, MPI_Comm comm, MPI_Info info); diff --git a/src/single_io.c b/src/single_io.c index 59686a68b5d9e5ea41267ba7b3aad9391862fae4..801428433ef5170082b68dec425e52f845bb41ae 100644 --- a/src/single_io.c +++ b/src/single_io.c @@ -39,9 +39,6 @@ #include "common_io.h" #include "error.h" -#define FILENAME_BUFFER_SIZE 150 -#define PARTICLE_GROUP_BUFFER_SIZE 20 - /*----------------------------------------------------------------------------- * Routines reading an IC file *-----------------------------------------------------------------------------*/ @@ -56,24 +53,23 @@ * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of *interest in the parts array + * @param partSize The size in bytes of the particle structure. * @param importance If COMPULSORY, the data must be present in the IC file. If *OPTIONAL, the array will be zeroed when the data is not present. * * @todo A better version using HDF5 hyper-slabs to read the file directly into *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, - int dim, char* part_c, enum DATA_IMPORTANCE importance) { + int dim, char* part_c, size_t partSize, + enum DATA_IMPORTANCE importance) { hid_t h_data = 0, h_err = 0, h_type = 0; htri_t exist = 0; void* temp; int i = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; /* Check whether the dataspace exists or not */ @@ -141,23 +137,25 @@ void readArrayBackEnd(hid_t grp, char* name, enum DATA_TYPE type, int N, * @param grp The group in which to write. * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param name The name of the array to write. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) * @param part_c A (char*) pointer on the first occurrence of the field of - *interest in the parts array + *interest in the parts array. + * @param partSize The size in bytes of the particle structure. * @param us The UnitSystem currently in use * @param convFactor The UnitConversionFactor for this array * * @todo A better version using HDF5 hyper-slabs to write the file directly from *the part array * will be written once the structures have been stabilized. - * - * Calls #error() if an error occurs. */ -void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, - enum DATA_TYPE type, int N, int dim, char* part_c, +void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, + char* partTypeGroupName, char* name, enum DATA_TYPE type, + int N, int dim, char* part_c, size_t partSize, struct UnitSystem* us, enum UnitConversionFactor convFactor) { hid_t h_data = 0, h_err = 0, h_space = 0, h_prop = 0; @@ -165,7 +163,6 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, int i = 0, rank = 0; const size_t typeSize = sizeOfType(type); const size_t copySize = typeSize * dim; - const size_t partSize = sizeof(struct part); char* temp_c = 0; hsize_t shape[2]; hsize_t chunk_shape[2]; @@ -204,7 +201,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, /* Make sure the chunks are not larger than the dataset */ if (chunk_shape[0] > N) chunk_shape[0] = N; - + /* Change shape of data space */ h_err = H5Sset_extent_simple(h_space, rank, shape, NULL); if (h_err < 0) { @@ -241,7 +238,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, } /* Write XMF description for this data set */ - writeXMFline(xmfFile, fileName, name, N, dim, type); + writeXMFline(xmfFile, fileName, partTypeGroupName, name, N, dim, type); /* Write unit conversion factors for this data set */ conversionString(buffer, us, convFactor); @@ -276,7 +273,7 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, #define readArray(grp, name, type, N, dim, part, N_total, offset, field, \ importance) \ readArrayBackEnd(grp, name, type, N, dim, (char*)(&(part[0]).field), \ - importance) + sizeof(part[0]), importance) /** * @brief A helper macro to call the readArrayBackEnd function more easily. @@ -285,6 +282,8 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName The name of the file in which the data is written * @param xmfFile The FILE used to write the XMF description * @param name The name of the array to write. + * @param partTypeGroupName The name of the group containing the particles in + *the HDF5 file. * @param type The #DATA_TYPE of the array. * @param N The number of particles to write. * @param dim The dimension of the data (1 for scalar, 3 for vector) @@ -298,10 +297,12 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param convFactor The UnitConversionFactor for this array * */ -#define writeArray(grp, fileName, xmfFile, name, type, N, dim, part, N_total, \ - mpi_rank, offset, field, us, convFactor) \ - writeArrayBackEnd(grp, fileName, xmfFile, name, type, N, dim, \ - (char*)(&(part[0]).field), us, convFactor) +#define writeArray(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, part, N_total, mpi_rank, offset, field, us, \ + convFactor) \ + writeArrayBackEnd(grp, fileName, xmfFile, partTypeGroupName, name, type, N, \ + dim, (char*)(&(part[0]).field), sizeof(part[0]), us, \ + convFactor) /* Import the right hydro definition */ #include "hydro_io.h" @@ -314,9 +315,9 @@ void writeArrayBackEnd(hid_t grp, char* fileName, FILE* xmfFile, char* name, * @param fileName The file to read. * @param dim (output) The dimension of the volume. * @param parts (output) Array of Gas particles. - * @param gparts (output) Array of DM particles. + * @param gparts (output) Array of #gpart particles. * @param Ngas (output) number of Gas particles read. - * @param Ngparts (output) The number of DM particles read. + * @param Ngparts (output) The number of #gpart read. * @param periodic (output) 1 if the volume is periodic, 0 if not. * * Opens the HDF5 file fileName and reads the particles contained @@ -337,6 +338,8 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, double boxSize[3] = {0.0, -1.0, -1.0}; /* GADGET has 6 particle types. We only keep the type 0 & 1 for now...*/ int numParticles[NUM_PARTICLE_TYPES] = {0}; + int numParticles_highWord[NUM_PARTICLE_TYPES] = {0}; + size_t N[NUM_PARTICLE_TYPES] = {0}; size_t Ndm; /* Open file */ @@ -365,9 +368,12 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, /* Read the relevant information and print status */ readAttribute(h_grp, "BoxSize", DOUBLE, boxSize); readAttribute(h_grp, "NumPart_Total", UINT, numParticles); + readAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticles_highWord); + + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) + N[ptype] = ((long long)numParticles[ptype]) + + ((long long)numParticles_highWord[ptype] << 32); - *Ngas = numParticles[0]; - Ndm = numParticles[1]; dim[0] = boxSize[0]; dim[1] = (boxSize[1] < 0) ? boxSize[0] : boxSize[1]; dim[2] = (boxSize[2] < 0) ? boxSize[0] : boxSize[2]; @@ -378,16 +384,16 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, /* Close header */ H5Gclose(h_grp); - /* Total number of particles */ - *Ngparts = *Ngas + Ndm; - /* Allocate memory to store SPH particles */ + *Ngas = N[0]; if (posix_memalign((void*)parts, part_align, *Ngas * sizeof(struct part)) != 0) error("Error while allocating memory for SPH particles"); bzero(*parts, *Ngas * sizeof(struct part)); /* Allocate memory to store all particles */ + Ndm = N[1]; + *Ngparts = N[1] + N[0]; if (posix_memalign((void*)gparts, gpart_align, *Ngparts * sizeof(struct gpart)) != 0) error("Error while allocating memory for gravity particles"); @@ -396,16 +402,14 @@ void read_ic_single(char* fileName, double dim[3], struct part** parts, /* message("Allocated %8.2f MB for particles.", *N * sizeof(struct part) / * (1024.*1024.)); */ - /* Open SPH particles group */ - /* message("Reading particle arrays..."); */ - message("BoxSize = %lf", dim[0]); - message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); + /* message("BoxSize = %lf", dim[0]); */ + /* message("NumPart = [%zd, %zd] Total = %zd", *Ngas, Ndm, *Ngparts); */ /* Loop over all particle types */ for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ptype++) { /* Don't do anything if no particle of this kind */ - if (numParticles[ptype] == 0) continue; + if (N[ptype] == 0) continue; /* Open the particle group in the file */ char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; @@ -476,10 +480,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { static int outputCount = 0; /* Number of particles of each type */ - const size_t Ndm = Ntot - Ngas; - int numParticles[NUM_PARTICLE_TYPES] = /* Gadget-2 convention here */ - {Ngas, Ndm, 0}; /* Could use size_t instead */ - int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + // const size_t Ndm = Ntot - Ngas; + + /* MATTHIEU: Temporary fix to preserve master */ + const size_t Ndm = Ntot > 0 ? Ntot - Ngas : 0; + /* MATTHIEU: End temporary fix */ + + long long N_total[NUM_PARTICLE_TYPES] = {Ngas, Ndm, 0}; /* File name */ char fileName[FILENAME_BUFFER_SIZE]; @@ -493,7 +500,7 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { xmfFile = prepareXMFfile(); /* Write the part corresponding to this specific output */ - writeXMFheader(xmfFile, Ngas, fileName, e->time); + writeXMFoutputheader(xmfFile, fileName, e->time); /* Open file */ /* message("Opening file '%s'.", fileName); */ @@ -521,19 +528,27 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Print the relevant information and print status */ writeAttribute(h_grp, "BoxSize", DOUBLE, e->s->dim, 3); - writeAttribute(h_grp, "NumPart_ThisFile", UINT, numParticles, - NUM_PARTICLE_TYPES); double dblTime = e->time; writeAttribute(h_grp, "Time", DOUBLE, &dblTime, 1); /* GADGET-2 legacy values */ + /* Number of particles of each type */ + unsigned int numParticles[NUM_PARTICLE_TYPES] = {0}; + unsigned int numParticlesHighWord[NUM_PARTICLE_TYPES] = {0}; + for (int ptype = 0; ptype < NUM_PARTICLE_TYPES; ++ptype) { + numParticles[ptype] = (unsigned int)N_total[ptype]; + numParticlesHighWord[ptype] = (unsigned int)(N_total[ptype] >> 32); + } + writeAttribute(h_grp, "NumPart_ThisFile", LONGLONG, N_total, + NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total", UINT, numParticles, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumPart_Total_HighWord", UINT, numParticlesHighWord, NUM_PARTICLE_TYPES); - double MassTable[NUM_PARTICLE_TYPES] = {0., 0., 0., 0., 0., 0.}; + double MassTable[NUM_PARTICLE_TYPES] = {0}; writeAttribute(h_grp, "MassTable", DOUBLE, MassTable, NUM_PARTICLE_TYPES); - writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, numParticlesHighWord, + unsigned int flagEntropy[NUM_PARTICLE_TYPES] = {0}; + writeAttribute(h_grp, "Flag_Entropy_ICs", UINT, flagEntropy, NUM_PARTICLE_TYPES); writeAttribute(h_grp, "NumFilesPerSnapshot", INT, &numFiles, 1); @@ -558,6 +573,9 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Don't do anything if no particle of this kind */ if (numParticles[ptype] == 0) continue; + /* Add the global information for that particle type to the XMF meta-file */ + writeXMFgroupheader(xmfFile, fileName, numParticles[ptype], ptype); + /* Open the particle group in the file */ char partTypeGroupName[PARTICLE_GROUP_BUFFER_SIZE]; snprintf(partTypeGroupName, PARTICLE_GROUP_BUFFER_SIZE, "/PartType%d", @@ -574,8 +592,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { switch (ptype) { case GAS: - hydro_write_particles(h_grp, fileName, xmfFile, Ngas, Ngas, 0, 0, parts, - us); + hydro_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, Ngas, + Ngas, 0, 0, parts, us); break; case DM: @@ -589,8 +607,8 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { collect_dm_gparts(gparts, Ntot, dmparts, Ndm); /* Write DM particles */ - darkmatter_write_particles(h_grp, fileName, xmfFile, Ndm, Ndm, 0, 0, - dmparts, us); + darkmatter_write_particles(h_grp, fileName, partTypeGroupName, xmfFile, + Ndm, Ndm, 0, 0, dmparts, us); /* Free temporary array */ free(dmparts); @@ -602,10 +620,13 @@ void write_output_single(struct engine* e, struct UnitSystem* us) { /* Close particle group */ H5Gclose(h_grp); + + /* Close this particle group in the XMF file as well */ + writeXMFgroupfooter(xmfFile, ptype); } /* Write LXMF file descriptor */ - writeXMFfooter(xmfFile); + writeXMFoutputfooter(xmfFile, outputCount, e->time); /* message("Done writing particles..."); */ diff --git a/src/space.c b/src/space.c index 954c9af7dd6d92adcd29d836dde16a61cf0f4792..d1a78b03da7c0a7cf5e742ec16912b94a442478b 100644 --- a/src/space.c +++ b/src/space.c @@ -97,12 +97,10 @@ const int sortlistID[27] = { int space_getsid(struct space *s, struct cell **ci, struct cell **cj, double *shift) { - int k, sid = 0, periodic = s->periodic; - struct cell *temp; - double dx[3]; - /* Get the relative distance between the pairs, wrapping. */ - for (k = 0; k < 3; k++) { + const int periodic = s->periodic; + double dx[3]; + for (int k = 0; k < 3; k++) { dx[k] = (*cj)->loc[k] - (*ci)->loc[k]; if (periodic && dx[k] < -s->dim[k] / 2) shift[k] = s->dim[k]; @@ -114,15 +112,16 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, } /* Get the sorting index. */ - for (k = 0; k < 3; k++) + int sid = 0; + for (int k = 0; k < 3; k++) sid = 3 * sid + ((dx[k] < 0.0) ? 0 : ((dx[k] > 0.0) ? 2 : 1)); /* Switch the cells around? */ if (runner_flip[sid]) { - temp = *ci; + struct cell *temp = *ci; *ci = *cj; *cj = temp; - for (k = 0; k < 3; k++) shift[k] = -shift[k]; + for (int k = 0; k < 3; k++) shift[k] = -shift[k]; } sid = sortlistID[sid]; @@ -137,10 +136,8 @@ int space_getsid(struct space *s, struct cell **ci, struct cell **cj, void space_rebuild_recycle(struct space *s, struct cell *c) { - int k; - if (c->split) - for (k = 0; k < 8; k++) + for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { space_rebuild_recycle(s, c->progeny[k]); space_recycle(s, c->progeny[k]); @@ -158,19 +155,19 @@ void space_rebuild_recycle(struct space *s, struct cell *c) { void space_regrid(struct space *s, double cell_max, int verbose) { - float h_max = s->cell_min / kernel_gamma / space_stretch, dmin; - int i, j, k, cdim[3], nr_parts = s->nr_parts; + float h_max = s->cell_min / kernel_gamma / space_stretch; + const size_t nr_parts = s->nr_parts; struct cell *restrict c; ticks tic = getticks(); /* Run through the parts and get the current h_max. */ // tic = getticks(); if (s->cells != NULL) { - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { if (s->cells[k].h_max > h_max) h_max = s->cells[k].h_max; } } else { - for (k = 0; k < nr_parts; k++) { + for (int k = 0; k < nr_parts; k++) { if (s->parts[k].h > h_max) h_max = s->parts[k].h; } s->h_max = h_max; @@ -190,7 +187,8 @@ void space_regrid(struct space *s, double cell_max, int verbose) { if (verbose) message("h_max is %.3e (cell_max=%.3e).", h_max, cell_max); /* Get the new putative cell dimensions. */ - for (k = 0; k < 3; k++) + int cdim[3]; + for (int k = 0; k < 3; k++) cdim[k] = floor(s->dim[k] / fmax(h_max * kernel_gamma * space_stretch, cell_max)); @@ -213,7 +211,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { /* Free the old cells, if they were allocated. */ if (s->cells != NULL) { - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { space_rebuild_recycle(s, &s->cells[k]); if (s->cells[k].sort != NULL) free(s->cells[k].sort); } @@ -222,12 +220,12 @@ void space_regrid(struct space *s, double cell_max, int verbose) { } /* Set the new cell dimensions only if smaller. */ - for (k = 0; k < 3; k++) { + for (int k = 0; k < 3; k++) { s->cdim[k] = cdim[k]; s->h[k] = s->dim[k] / cdim[k]; s->ih[k] = 1.0 / s->h[k]; } - dmin = fminf(s->h[0], fminf(s->h[1], s->h[2])); + const float dmin = fminf(s->h[0], fminf(s->h[1], s->h[2])); /* Allocate the highest level of cells. */ s->tot_cells = s->nr_cells = cdim[0] * cdim[1] * cdim[2]; @@ -235,13 +233,13 @@ void space_regrid(struct space *s, double cell_max, int verbose) { s->nr_cells * sizeof(struct cell)) != 0) error("Failed to allocate cells."); bzero(s->cells, s->nr_cells * sizeof(struct cell)); - for (k = 0; k < s->nr_cells; k++) + for (int k = 0; k < s->nr_cells; k++) if (lock_init(&s->cells[k].lock) != 0) error("Failed to init spinlock."); /* Set the cell location and sizes. */ - for (i = 0; i < cdim[0]; i++) - for (j = 0; j < cdim[1]; j++) - for (k = 0; k < cdim[2]; k++) { + for (int i = 0; i < cdim[0]; i++) + for (int j = 0; j < cdim[1]; j++) + for (int k = 0; k < cdim[2]; k++) { c = &s->cells[cell_getid(cdim, i, j, k)]; c->loc[0] = i * s->h[0]; c->loc[1] = j * s->h[1]; @@ -271,7 +269,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { else { /* Free the old cells, if they were allocated. */ - for (k = 0; k < s->nr_cells; k++) { + for (int k = 0; k < s->nr_cells; k++) { space_rebuild_recycle(s, &s->cells[k]); s->cells[k].sorts = NULL; s->cells[k].nr_tasks = 0; @@ -308,7 +306,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { void space_rebuild(struct space *s, double cell_max, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); /* Be verbose about this. */ // message( "re)building space..." ); fflush(stdout); @@ -320,23 +318,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { int nr_gparts = s->nr_gparts; struct cell *restrict cells = s->cells; - double ih[3], dim[3]; - int cdim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - cdim[0] = s->cdim[0]; - cdim[1] = s->cdim[1]; - cdim[2] = s->cdim[2]; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; /* Run through the particles and get their cell index. */ // tic = getticks(); const size_t ind_size = s->size_parts; - size_t *ind; - if ((ind = (size_t *)malloc(sizeof(size_t) * ind_size)) == NULL) + int *ind; + if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); for (int k = 0; k < nr_parts; k++) { struct part *restrict p = &s->parts[k]; @@ -349,37 +339,92 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count++; } + // message( "getting particle indices took %.3f %s." , + // clocks_from_ticks(getticks() - tic), clocks_getunit()): + + /* Run through the gravity particles and get their cell index. */ + // tic = getticks(); + const size_t gind_size = s->size_gparts; + int *gind; + if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) + error("Failed to allocate temporary g-particle indices."); + for (int k = 0; k < nr_gparts; k++) { + struct gpart *restrict gp = &s->gparts[k]; + for (int j = 0; j < 3; j++) + if (gp->x[j] < 0.0) + gp->x[j] += dim[j]; + else if (gp->x[j] >= dim[j]) + gp->x[j] -= dim[j]; + gind[k] = + cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); + cells[gind[k]].gcount++; + } // message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()): +// clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ - const int nodeID = s->e->nodeID; + const int local_nodeID = s->e->nodeID; for (int k = 0; k < nr_parts; k++) - if (cells[ind[k]].nodeID != nodeID) { + if (cells[ind[k]].nodeID != local_nodeID) { cells[ind[k]].count -= 1; nr_parts -= 1; - struct part tp = s->parts[k]; + const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; - struct xpart txp = s->xparts[k]; + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->id_or_neg_offset = -k; + } + if (s->parts[nr_parts].gpart != NULL) { + s->parts[nr_parts].gpart->id_or_neg_offset = -nr_parts; + } + const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; s->xparts[nr_parts] = txp; - int t = ind[k]; + const int t = ind[k]; ind[k] = ind[nr_parts]; ind[nr_parts] = t; } + /* Move non-local gparts to the end of the list. */ + for (int k = 0; k < nr_gparts; k++) + if (cells[gind[k]].nodeID != local_nodeID) { + cells[gind[k]].gcount -= 1; + nr_gparts -= 1; + const struct gpart tp = s->gparts[k]; + s->gparts[k] = s->gparts[nr_gparts]; + s->gparts[nr_gparts] = tp; + if (s->gparts[k].id_or_neg_offset <= 0) { + s->parts[-s->gparts[k].id_or_neg_offset].gpart = &s->gparts[k]; + } + if (s->gparts[nr_gparts].id_or_neg_offset <= 0) { + s->parts[-s->gparts[nr_gparts].id_or_neg_offset].gpart = + &s->gparts[nr_gparts]; + } + const int t = gind[k]; + gind[k] = gind[nr_gparts]; + gind[nr_gparts] = t; + } + /* Exchange the strays, note that this potentially re-allocates the parts arrays. */ - s->nr_parts = - nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], - s->nr_parts - nr_parts); + /* TODO: This function also exchanges gparts, but this is shorted-out + until they are fully implemented. */ + size_t nr_parts_exchanged = s->nr_parts - nr_parts; + size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, + nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged); + + /* Add post-processing, i.e. re-linking/creating of gparts here. */ + + /* Set the new particle counts. */ + s->nr_parts = nr_parts + nr_parts_exchanged; + s->nr_gparts = nr_gparts + nr_gparts_exchanged; /* Re-allocate the index array if needed.. */ if (s->nr_parts > ind_size) { - size_t *ind_new; - if ((ind_new = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) + int *ind_new; + if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate temporary particle indices."); memcpy(ind_new, ind, sizeof(size_t) * nr_parts); free(ind); @@ -388,7 +433,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Assign each particle to its cell. */ for (int k = nr_parts; k < s->nr_parts; k++) { - struct part *p = &s->parts[k]; + const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count += 1; @@ -418,65 +463,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(ind); - /* Run through the gravity particles and get their cell index. */ - // tic = getticks(); - const size_t gind_size = s->size_gparts; - size_t *gind; - if ((gind = (size_t *)malloc(sizeof(size_t) * gind_size)) == NULL) - error("Failed to allocate temporary g-particle indices."); - for (int k = 0; k < nr_gparts; k++) { - struct gpart *gp = &s->gparts[k]; - for (int j = 0; j < 3; j++) - if (gp->x[j] < 0.0) - gp->x[j] += dim[j]; - else if (gp->x[j] >= dim[j]) - gp->x[j] -= dim[j]; - gind[k] = - cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells[gind[k]].gcount++; - } -// message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); - #ifdef WITH_MPI - /* Move non-local gparts to the end of the list. */ - for (int k = 0; k < nr_gparts; k++) - if (cells[ind[k]].nodeID != nodeID) { - cells[ind[k]].gcount -= 1; - nr_gparts -= 1; - struct gpart tp = s->gparts[k]; - s->gparts[k] = s->gparts[nr_gparts]; - s->gparts[nr_gparts] = tp; - int t = ind[k]; - ind[k] = ind[nr_gparts]; - ind[nr_gparts] = t; - } - - /* Exchange the strays, note that this potentially re-allocates - the parts arrays. */ - // s->nr_gparts = - // nr_gparts + engine_exchange_strays(s->e, nr_gparts, &ind[nr_gparts], - // s->nr_gparts - nr_gparts); - if (nr_gparts > 0) - error("Need to implement the exchange of strays for the gparts"); - /* Re-allocate the index array if needed.. */ if (s->nr_gparts > gind_size) { - size_t *gind_new; - if ((gind_new = (size_t *)malloc(sizeof(size_t) * s->nr_gparts)) == NULL) + int *gind_new; + if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) error("Failed to allocate temporary g-particle indices."); - memcpy(gind_new, gind, sizeof(size_t) * nr_gparts); + memcpy(gind_new, gind, sizeof(int) * nr_gparts); free(gind); gind = gind_new; } /* Assign each particle to its cell. */ for (int k = nr_gparts; k < s->nr_gparts; k++) { - struct gpart *p = &s->gparts[k]; + const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[gind[k]].count += 1; + cells[gind[k]].gcount += 1; /* if ( cells[ ind[k] ].nodeID != nodeID ) error( "Received part that does not belong to me (nodeID=%i)." , cells[ ind[k] ].nodeID ); */ @@ -494,6 +498,29 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(gind); + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (s->gparts[k].id_or_neg_offset < 0) { + + const struct part *part = &s->parts[-s->gparts[k].id_or_neg_offset]; + + if (part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != part->x[0] || s->gparts[k].x[1] != part->x[1] || + s->gparts[k].x[2] != part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (s->parts[k].gpart != NULL && + s->parts[k].gpart->id_or_neg_offset != -k) { + error("Linking problem !"); + } + } + /* Hook the cells up to the parts. */ // tic = getticks(); struct part *finger = s->parts; @@ -529,7 +556,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { */ void space_split(struct space *s, struct cell *cells, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); for (int k = 0; k < s->nr_cells; k++) scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k, @@ -553,7 +580,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param verbose Are we talkative ? */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose) { ticks tic = getticks(); @@ -601,7 +628,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, void space_do_parts_sort() { /* Pointers to the sorting data. */ - size_t *ind = space_sort_struct.ind; + int *ind = space_sort_struct.ind; struct part *parts = space_sort_struct.parts; struct xpart *xparts = space_sort_struct.xparts; @@ -723,7 +750,7 @@ void space_do_parts_sort() { } /* main loop. */ } -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, +void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min, int max) { struct qstack { diff --git a/src/space.h b/src/space.h index 91485ff7e2ebe9da8ab927748589ae9f71320803..e761595838ae78b0d8a67cca676cfa59f3f700f6 100644 --- a/src/space.h +++ b/src/space.h @@ -64,9 +64,6 @@ struct space { /* The minimum and maximum cutoff radii. */ double h_max, cell_min; - /* Current time step for particles. */ - float dt_step; - /* Current maximum displacement for particles. */ float dx_max; @@ -106,6 +103,8 @@ struct space { /* Buffers for parts that we will receive from foreign cells. */ struct part *parts_foreign; size_t nr_parts_foreign, size_parts_foreign; + struct gpart *gparts_foreign; + size_t nr_gparts_foreign, size_gparts_foreign; }; /* Interval stack necessary for parallel particle sorting. */ @@ -117,7 +116,7 @@ struct qstack { struct parallel_sort { struct part *parts; struct xpart *xparts; - size_t *ind; + int *ind; struct qstack *stack; unsigned int stack_size; volatile unsigned int first, last, waiting; @@ -125,9 +124,9 @@ struct parallel_sort { extern struct parallel_sort space_sort_struct; /* function prototypes. */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose); -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, +void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min, int max); struct cell *space_getcell(struct space *s); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, diff --git a/src/swift.h b/src/swift.h index 9ab090dccd195ff4927d3e614e446b36d273f824..e568a28c888295affc9ec45b6d059d34f5b4bf04 100644 --- a/src/swift.h +++ b/src/swift.h @@ -27,7 +27,6 @@ #include "cell.h" #include "clocks.h" #include "const.h" -#include "const.h" #include "cycle.h" #include "debug.h" #include "engine.h" @@ -38,7 +37,9 @@ #include "map.h" #include "multipole.h" #include "parallel_io.h" +#include "parser.h" #include "part.h" +#include "partition.h" #include "queue.h" #include "runner.h" #include "scheduler.h" @@ -47,9 +48,8 @@ #include "space.h" #include "task.h" #include "timers.h" -#include "units.h" #include "tools.h" -#include "partition.h" +#include "units.h" #include "version.h" #endif /* SWIFT_SWIFT_H */ diff --git a/src/task.c b/src/task.c index 69109f9e6d4fe8730a317db46ea3862e65ab90b2..74f8451d5dfaec2454f2eeed8670765e1be5b658 100644 --- a/src/task.c +++ b/src/task.c @@ -145,7 +145,7 @@ int task_lock(struct task *t) { #ifdef WITH_MPI /* Check the status of the MPI request. */ - int res, err; + int res = 0, err = 0; MPI_Status stat; if ((err = MPI_Test(&t->req, &res, &stat)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; diff --git a/src/tools.c b/src/tools.c index d5749e88e27a5f7491f5f5108586629ecc83d13e..1efdc027d3da50733372e73e1cfd6a9c7206784f 100644 --- a/src/tools.c +++ b/src/tools.c @@ -236,6 +236,53 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { } } +void self_all_density(struct runner *r, struct cell *ci) { + float r2, hi, hj, hig2, hjg2, dxi[3]; //, dxj[3]; + struct part *pi, *pj; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->count; ++i) { + + pi = &ci->parts[i]; + hi = pi->h; + hig2 = hi * hi * kernel_gamma2; + + for (int j = i + 1; j < ci->count; ++j) { + + pj = &ci->parts[j]; + hj = pj->h; + hjg2 = hj * hj * kernel_gamma2; + + if (pi == pj) continue; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k]; + r2 += dxi[k] * dxi[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + + /* Interact */ + runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj); + } + + /* Hit or miss? */ + if (r2 < hjg2) { + + dxi[0] = -dxi[0]; + dxi[1] = -dxi[1]; + dxi[2] = -dxi[2]; + + /* Interact */ + runner_iact_nonsym_density(r2, dxi, hj, hi, pj, pi); + } + } + } +} + void pairs_single_grav(double *dim, long long int pid, struct gpart *__restrict__ gparts, const struct part *parts, int N, int periodic) { @@ -256,9 +303,9 @@ void pairs_single_grav(double *dim, long long int pid, break; if (k == N) error("Part not found."); pi = gparts[k]; - pi.a[0] = 0.0f; - pi.a[1] = 0.0f; - pi.a[2] = 0.0f; + pi.a_grav[0] = 0.0f; + pi.a_grav[1] = 0.0f; + pi.a_grav[2] = 0.0f; /* Loop over all particle pairs. */ for (k = 0; k < N; k++) { @@ -276,15 +323,15 @@ void pairs_single_grav(double *dim, long long int pid, } r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2]; runner_iact_grav(r2, fdx, &pi, &pj); - a[0] += pi.a[0]; - a[1] += pi.a[1]; - a[2] += pi.a[2]; - aabs[0] += fabsf(pi.a[0]); - aabs[1] += fabsf(pi.a[1]); - aabs[2] += fabsf(pi.a[2]); - pi.a[0] = 0.0f; - pi.a[1] = 0.0f; - pi.a[2] = 0.0f; + a[0] += pi.a_grav[0]; + a[1] += pi.a_grav[1]; + a[2] += pi.a_grav[2]; + aabs[0] += fabsf(pi.a_grav[0]); + aabs[1] += fabsf(pi.a_grav[1]); + aabs[2] += fabsf(pi.a_grav[2]); + pi.a_grav[0] = 0.0f; + pi.a_grav[1] = 0.0f; + pi.a_grav[2] = 0.0f; } /* Dump the result. */ diff --git a/src/tools.h b/src/tools.h index ed85c1bcb4c0bb34d255a8ab2fbf402b5dda6ba4..01226ee7cdbfe42aa44affadc4a9cbe02bad2428 100644 --- a/src/tools.h +++ b/src/tools.h @@ -34,6 +34,7 @@ void pairs_single_density(double *dim, long long int pid, struct part *__restrict__ parts, int N, int periodic); void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj); +void self_all_density(struct runner *r, struct cell *ci); void pairs_n2(double *dim, struct part *__restrict__ parts, int N, int periodic); diff --git a/tests/Makefile.am b/tests/Makefile.am index f0bfbefd3c7f4591134d1707c4ac9bf63278e855..d66282059d874f345437d779d59ec3edb08e47cb 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -21,10 +21,12 @@ AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -DTIMER AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) # List of programs and scripts to run in the test suite -TESTS = testGreetings testReading.sh testSingle testTimeIntegration +TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \ + test27cells.sh test27cellsPerturbed.sh testParser.sh # List of test programs to compile -check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration testSPHStep testVectorize +check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ + testSPHStep testPair test27cells testParser # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -37,7 +39,13 @@ testSPHStep_SOURCES = testSPHStep.c testSingle_SOURCES = testSingle.c -testVectorize_SOURCES = testVectorize.c +testPair_SOURCES = testPair.c + +test27cells_SOURCES = test27cells.c + +testParser_SOURCES = testParser.c # Files necessary for distribution -EXTRA_DIST = testReading.sh makeInput.py +EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ + test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \ + testParserInput.yaml diff --git a/tests/difffloat.py b/tests/difffloat.py new file mode 100644 index 0000000000000000000000000000000000000000..bbb7c95a1e77e04bbe21bec6dc6c5d529cd77c70 --- /dev/null +++ b/tests/difffloat.py @@ -0,0 +1,103 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +from numpy import * +import sys + +abs_tol = 1e-7 +rel_tol = 1e-7 + +# Compares the content of two ASCII tables of floats line by line and +# reports all differences beyond the given tolerances +# Comparisons are done both in absolute and relative terms + +# Individual tolerances for each column can be provided in a file + +file1 = sys.argv[1] +file2 = sys.argv[2] +fileTol = "" + +if len(sys.argv) == 4: + fileTol = sys.argv[3] + +data1 = loadtxt(file1) +data2 = loadtxt(file2) +if fileTol != "": + dataTol = loadtxt(fileTol) + n_linesTol = shape(dataTol)[0] + n_columnsTol = shape(dataTol)[1] + + +if shape(data1) != shape(data2): + print "Non-matching array sizes in the files", file1, "and", file2, "." + sys.exit(1) + +n_lines = shape(data1)[0] +n_columns = shape(data1)[1] + +if fileTol != "": + if n_linesTol != 2: + print "Incorrect number of lines in tolerance file '%s'."%fileTol + if n_columnsTol != n_columns: + print "Incorrect number of columns in tolerance file '%s'."%fileTol + +if fileTol == "": + print "Absolute difference tolerance:", abs_tol + print "Relative difference tolerance:", rel_tol + absTol = ones(n_columns) * abs_tol + relTol = ones(n_columns) * rel_tol +else: + print "Tolerances read from file" + absTol = dataTol[0,:] + relTol = dataTol[1,:] + +error = False +for i in range(n_lines): + for j in range(n_columns): + + abs_diff = abs(data1[i,j] - data2[i,j]) + + sum = abs(data1[i,j] + data2[i,j]) + if sum > 0: + rel_diff = abs(data1[i,j] - data2[i,j]) / sum + else: + rel_diff = 0. + + if( abs_diff > absTol[j]): + print "Absolute difference larger than tolerance (%e) on line %d, column %d:"%(absTol[j], i,j) + print "%10s: a = %e"%("File 1", data1[i,j]) + print "%10s: b = %e"%("File 2", data2[i,j]) + print "%10s: |a-b| = %e"%("Difference", abs_diff) + print "" + error = True + + if( rel_diff > relTol[j]): + print "Relative difference larger than tolerance (%e) on line %d, column %d:"%(relTol[j], i,j) + print "%10s: a = %e"%("File 1", data1[i,j]) + print "%10s: b = %e"%("File 2", data2[i,j]) + print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff) + print "" + error = True + + +if error: + exit(1) +else: + print "No differences found" + exit(0) diff --git a/tests/test27cells.c b/tests/test27cells.c new file mode 100644 index 0000000000000000000000000000000000000000..74c38996a81056b10633bf2bbf18cc7cff7e8f0d --- /dev/null +++ b/tests/test27cells.c @@ -0,0 +1,367 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include "swift.h" + +/** + * Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + +/* n is both particles per axis and box size: + * particles are generated on a mesh with unit spacing + */ +struct cell *make_cell(size_t n, double *offset, double size, double h, + double density, long long *partId, double pert) { + const size_t count = n * n * n; + const double volume = size * size * size; + struct cell *cell = malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + // part->v[0] = part->x[0] - 1.5; + // part->v[1] = part->x[1] - 1.5; + // part->v[2] = part->x[2] - 1.5; + part->v[0] = random_uniform(-0.05, 0.05); + part->v[1] = random_uniform(-0.05, 0.05); + part->v[2] = random_uniform(-0.05, 0.05); + part->h = size * h / (float)n; + part->id = ++(*partId); + part->mass = density * volume / count; + part->ti_begin = 0; + part->ti_end = 1; + ++part; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h; + cell->count = count; + cell->dx_max = 0.; + cell->h[0] = size; + cell->h[1] = size; + cell->h[2] = size; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_end_min = 1; + cell->ti_end_max = 1; + + cell->sorted = 0; + cell->sort = NULL; + cell->sortsize = 0; + runner_dosort(NULL, cell, 0x1FFF, 0); + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sort); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + c->parts[pid].rho = 0.f; + c->parts[pid].rho_dh = 0.f; + hydro_init_part(&c->parts[pid]); + } +} + +/** + * @brief Ends the loop by adding the appropriate coefficients + */ +void end_calculation(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + hydro_end_density(&c->parts[pid], 1); + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *main_cell, + struct cell **cells) { + + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + + fprintf(file, "# Main cell --------------------------------------------\n"); + + /* Write main cell */ + for (size_t pid = 0; pid < main_cell->count; pid++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + main_cell->parts[pid].id, main_cell->parts[pid].x[0], + main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], + main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], + main_cell->parts[pid].v[2], main_cell->parts[pid].rho, + main_cell->parts[pid].rho_dh, main_cell->parts[pid].density.wcount, + main_cell->parts[pid].density.wcount_dh, +#ifdef GADGET2_SPH + main_cell->parts[pid].div_v, main_cell->parts[pid].density.rot_v[0], + main_cell->parts[pid].density.rot_v[1], + main_cell->parts[pid].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + /* Write all other cells */ + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + + struct cell *cj = cells[i * 9 + j * 3 + k]; + if (cj == main_cell) continue; + + fprintf(file, + "# Offset: [%2d %2d %2d] -----------------------------------\n", + i - 1, j - 1, k - 1); + + for (size_t pjd = 0; pjd < cj->count; pjd++) { + fprintf( + file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], + cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], + cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, + cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, +#ifdef GADGET2_SPH + cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + } + } + } + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); +void runner_doself1_density(struct runner *r, struct cell *ci); + +/* And go... */ +int main(int argc, char *argv[]) { + + size_t runs = 0, particles = 0; + double h = 1.12575, size = 1., rho = 1.; + double perturbation = 0.; + char outputFileNameExtension[200] = ""; + char outputFileName[200] = ""; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "m:s:h:p:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &size); + break; + case 'p': + sscanf(optarg, "%zu", &particles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'm': + sscanf(optarg, "%lf", &rho); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0) { + printf( + "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates a cell pair, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair1_density." + "\n\nOptions:" + "\n-h DISTANCE=1.1255 - Smoothing length" + "\n-m rho - Physical density in the cell" + "\n-s size - Physical size of the cell" + "\n-d pert - Perturbation to apply to the particles [0,1[" + "\n-f fileName - Part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + /* Help users... */ + message("Smoothing length: h = %f", h); + message("Neighbour target: N = %f", kernel_nwneigh); + + /* Build the infrastructure */ + struct space space; + space.periodic = 0; + space.h_max = h; + + struct engine engine; + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 1; + + struct runner runner; + runner.e = &engine; + + /* Construct some cells */ + struct cell *cells[27]; + struct cell *main_cell; + static long long partId = 0; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + + double offset[3] = {i * size, j * size, k * size}; + cells[i * 9 + j * 3 + k] = + make_cell(particles, offset, size, h, rho, &partId, perturbation); + } + } + } + + /* Store the main cell for future use */ + main_cell = cells[13]; + + ticks time = 0; + for (size_t i = 0; i < runs; ++i) { + + /* Zero the fields */ + for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]); + + const ticks tic = getticks(); + + /* Run all the pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) + runner_dopair1_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + runner_doself1_density(&runner, main_cell); + + const ticks toc = getticks(); + time += toc - tic; + + /* Let's get physical ! */ + end_calculation(main_cell); + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_dopair_27_%s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + } + } + + /* Output timing */ + message("SWIFT calculation took : %15lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]); + + const ticks tic = getticks(); + + /* Run all the brute-force pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) pairs_all_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + self_all_density(&runner, main_cell); + + const ticks toc = getticks(); + + /* Let's get physical ! */ + end_calculation(main_cell); + + /* Dump */ + sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + + /* Output timing */ + message("Brute force calculation took : %15lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + for (int i = 0; i < 27; ++i) clean_up(cells[i]); + + return 0; +} diff --git a/tests/test27cells.sh b/tests/test27cells.sh new file mode 100755 index 0000000000000000000000000000000000000000..09d2513bd3ef404c7bf434948af7f10306c98ede --- /dev/null +++ b/tests/test27cells.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_27_standard.dat swift_dopair_27_standard.dat + +./test27cells -p 6 -r 1 -d 0 -f standard + +python difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat tolerance.dat + +exit $? diff --git a/tests/test27cellsPerturbed.sh b/tests/test27cellsPerturbed.sh new file mode 100755 index 0000000000000000000000000000000000000000..73d2933984d38f7dcc992f07ec2e016f3544b636 --- /dev/null +++ b/tests/test27cellsPerturbed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat + +./test27cells -p 6 -r 1 -d 0.1 -f perturbed + +python difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat tolerance.dat + +exit $? diff --git a/tests/testPair.c b/tests/testPair.c new file mode 100644 index 0000000000000000000000000000000000000000..23ce4eb3de460f4e17b7b6f81cb39a628f3d100f --- /dev/null +++ b/tests/testPair.c @@ -0,0 +1,305 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include "swift.h" + +/** + * Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + +/* n is both particles per axis and box size: + * particles are generated on a mesh with unit spacing + */ +struct cell *make_cell(size_t n, double *offset, double size, double h, + double density, unsigned long long *partId, + double pert) { + const size_t count = n * n * n; + const double volume = size * size * size; + struct cell *cell = malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + // part->v[0] = part->x[0] - 1.5; + // part->v[1] = part->x[1] - 1.5; + // part->v[2] = part->x[2] - 1.5; + part->v[0] = random_uniform(-0.05, 0.05); + part->v[1] = random_uniform(-0.05, 0.05); + part->v[2] = random_uniform(-0.05, 0.05); + part->h = size * h / (float)n; + part->id = ++(*partId); + part->mass = density * volume / count; + part->ti_begin = 0; + part->ti_end = 1; + ++part; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h; + cell->count = count; + cell->dx_max = 0.; + cell->h[0] = n; + cell->h[1] = n; + cell->h[2] = n; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_end_min = 1; + cell->ti_end_max = 1; + + cell->sorted = 0; + cell->sort = NULL; + cell->sortsize = 0; + runner_dosort(NULL, cell, 0x1FFF, 0); + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sort); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + c->parts[pid].rho = 0.f; + c->parts[pid].rho_dh = 0.f; + hydro_init_part(&c->parts[pid]); + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { + + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + + fprintf(file, "# ci --------------------------------------------\n"); + + for (size_t pid = 0; pid < ci->count; pid++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + ci->parts[pid].id, ci->parts[pid].x[0], ci->parts[pid].x[1], + ci->parts[pid].x[2], ci->parts[pid].v[0], ci->parts[pid].v[1], + ci->parts[pid].v[2], ci->parts[pid].rho, ci->parts[pid].rho_dh, + ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, +#ifdef GADGET2_SPH + ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], + ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + fprintf(file, "# cj --------------------------------------------\n"); + + for (size_t pjd = 0; pjd < cj->count; pjd++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], + cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], + cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, + cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, +#ifdef GADGET2_SPH + cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); + +int main(int argc, char *argv[]) { + size_t particles = 0, runs = 0, volume, type = 0; + double offset[3] = {0, 0, 0}, h = 1.1255, size = 1., rho = 1.; + double perturbation = 0.1; + struct cell *ci, *cj; + struct space space; + struct engine engine; + struct runner runner; + char c; + static unsigned long long partId = 0; + char outputFileNameExtension[200] = ""; + char outputFileName[200] = ""; + ticks tic, toc, time; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + srand(0); + + while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 'p': + sscanf(optarg, "%zu", &particles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 't': + sscanf(optarg, "%zu", &type); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0 || type > 2) { + printf( + "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates a cell pair, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair1_density." + "\n\nOptions:" + "\n-t TYPE=0 - cells share face (0), edge (1) or corner (2)" + "\n-h DISTANCE=1.1255 - smoothing length" + "\n-d pert - perturbation to apply to the particles [0,1[" + "\n-f fileName - part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + space.periodic = 0; + space.h_max = h; + + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 1; + runner.e = &engine; + + volume = particles * particles * particles; + message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part)); + + ci = make_cell(particles, offset, size, h, rho, &partId, perturbation); + for (size_t i = 0; i < type + 1; ++i) offset[i] = 1.; + cj = make_cell(particles, offset, size, h, rho, &partId, perturbation); + + time = 0; + for (size_t i = 0; i < runs; ++i) { + + /* Zero the fields */ + zero_particle_fields(ci); + zero_particle_fields(cj); + + tic = getticks(); + + /* Run the test */ + runner_dopair1_density(&runner, ci, cj); + + toc = getticks(); + time += toc - tic; + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_dopair_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, ci, cj); + } + } + + /* Output timing */ + message("SWIFT calculation took %lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + zero_particle_fields(ci); + zero_particle_fields(cj); + + tic = getticks(); + + /* Run the brute-force test */ + pairs_all_density(&runner, ci, cj); + + toc = getticks(); + + /* Dump */ + sprintf(outputFileName, "brute_force_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, ci, cj); + + /* Output timing */ + message("Brute force calculation took %lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + clean_up(ci); + clean_up(cj); + + return 0; +} diff --git a/tests/testPair.sh b/tests/testPair.sh new file mode 100755 index 0000000000000000000000000000000000000000..f6f505e56a2c7a5c3cff0ec04bd871278634193c --- /dev/null +++ b/tests/testPair.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_standard.dat swift_dopair_standard.dat + +./testPair -p 6 -r 1 -d 0 -f standard + +python difffloat.py brute_force_standard.dat swift_dopair_standard.dat tolerance.dat + +exit $? diff --git a/tests/testPairPerturbed.sh b/tests/testPairPerturbed.sh new file mode 100755 index 0000000000000000000000000000000000000000..544ba1b032da8426c065dcfb2ce3ee554c5e76a1 --- /dev/null +++ b/tests/testPairPerturbed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_perturbed.dat swift_dopair_perturbed.dat + +./testPair -p 6 -r 1 -d 0.1 -f perturbed + +python difffloat.py brute_force_perturbed.dat swift_dopair_perturbed.dat tolerance.dat + +exit $? diff --git a/tests/testParser.c b/tests/testParser.c new file mode 100644 index 0000000000000000000000000000000000000000..a4b8789fca056fef659bca78eae9d0effb2ceb66 --- /dev/null +++ b/tests/testParser.c @@ -0,0 +1,67 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2016 James Willis (james.s.willis@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "parser.h" +#include <assert.h> +#include <string.h> +#include <math.h> + +int main(int argc, char *argv[]) { + + const char *input_file = argv[1]; + + /* Create a structure to read file into. */ + struct swift_params param_file; + + /* Create variables that will be set from the parameter file. */ + int no_of_threads = 0; + int no_of_time_steps = 0; + float max_h = 0.0f; + double start_time = 0.0; + char ic_file[PARSER_MAX_LINE_SIZE]; + + /* Read the parameter file. */ + parser_read_file(input_file, ¶m_file); + + /* Print the contents of the structure. */ + parser_print_params(¶m_file); + + /* Retrieve parameters and store them in variables defined above. + * Have to specify the name of the parameter as it appears in the + * input file: testParserInput.yaml.*/ + parser_get_param_int(¶m_file, "no_of_threads", &no_of_threads); + parser_get_param_int(¶m_file, "no_of_time_steps", &no_of_time_steps); + parser_get_param_float(¶m_file, "max_h", &max_h); + parser_get_param_double(¶m_file, "start_time", &start_time); + parser_get_param_string(¶m_file, "ic_file", ic_file); + + /* Print the variables to check their values are correct. */ + printf( + "no_of_threads: %d, no_of_time_steps: %d, max_h: %f, start_time: %lf, " + "ic_file: %s\n", + no_of_threads, no_of_time_steps, max_h, start_time, ic_file); + + assert(no_of_threads == 16); + assert(no_of_time_steps == 10); + assert(fabs(max_h - 1.1255) < 0.00001); + assert(fabs(start_time - 1.23456789) < 0.00001); + assert(strcmp(ic_file, "ic_file.ini") == 0); /*strcmp returns 0 if correct.*/ + + return 0; +} diff --git a/tests/testParser.sh b/tests/testParser.sh new file mode 100755 index 0000000000000000000000000000000000000000..3dad7f386f792ff2beb6e94eb093bad4085023a4 --- /dev/null +++ b/tests/testParser.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./testParser testParserInput.yaml diff --git a/tests/testParserInput.yaml b/tests/testParserInput.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d695e6a8ddd327e31224f36a6e34767ea8d36408 --- /dev/null +++ b/tests/testParserInput.yaml @@ -0,0 +1,9 @@ +--- +no_of_threads: 16 # The number of threads that will be used. +no_of_time_steps: 10 +max_h: 1.1255 +start_time: 1.23456789 +#Input file +ic_file: ic_file.ini + +... diff --git a/tests/testReading.c b/tests/testReading.c index d2a2a766171a85ace486914f0f39a987d9d8c3d3..9dda4c7bad75d35a8a93e0c2acb0619409a91afd 100644 --- a/tests/testReading.c +++ b/tests/testReading.c @@ -22,7 +22,7 @@ int main() { - int Ngas = -1, Ngpart = -1; + size_t Ngas = 0, Ngpart = 0; int periodic = -1; int i, j, k, n; double dim[3]; diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 984b8ea867250d0bda1bc14d2600279a27321b2c..223078ecb637e64d94e37cdf8c0f60a86bdd5ff7 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -77,6 +77,10 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { #ifdef DEFAULT_SPH +/* Just a forward declaration... */ +void runner_doself1_density(struct runner *r, struct cell *ci); +void runner_doself2_force(struct runner *r, struct cell *ci); + /* Run a full time step integration for one cell */ int main() { @@ -132,7 +136,7 @@ int main() { /* Initialise the particles */ for (j = 0; j < 27; ++j) { - runner_doinit(&r, cells[j]); + runner_doinit(&r, cells[j], 0); } /* Compute density */ @@ -145,7 +149,7 @@ int main() { runner_doself2_force(&r, ci); runner_dokick(&r, ci, 1); - message("t_end=%f", p->t_end); + message("ti_end=%d", p->ti_end); free(ci->parts); free(ci->xparts); diff --git a/tests/testSingle.c b/tests/testSingle.c index c85b77ff1c5b2285c33fa7787bbd53deab463039..8771fba0c1912905d3936562fa9dad0223d89220 100644 --- a/tests/testSingle.c +++ b/tests/testSingle.c @@ -91,8 +91,8 @@ int main(int argc, char *argv[]) { p2.force.POrho2 = p2.u * (const_hydro_gamma - 1.0f) / p2.rho; /* Dump a header. */ - printParticle_single(&p1); - printParticle_single(&p2); + //printParticle_single(&p1, NULL); + //printParticle_single(&p2, NULL); printf("# r a_1 udt_1 a_2 udt_2\n"); /* Loop over the different radii. */ @@ -103,9 +103,9 @@ int main(int argc, char *argv[]) { r2 = dx[0] * dx[0]; /* Clear the particle fields. */ - p1.a[0] = 0.0f; + p1.a_hydro[0] = 0.0f; p1.force.u_dt = 0.0f; - p2.a[0] = 0.0f; + p2.a_hydro[0] = 0.0f; p2.force.u_dt = 0.0f; /* Interact the particles. */ @@ -130,8 +130,8 @@ int main(int argc, char *argv[]) { /* Output the results. */ printf( - "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], p1.a[0], - p1.a[1], p1.a[2], p1.force.u_dt, + "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], + p1.a_hydro[0], p1.a_hydro[1], p1.a_hydro[2], p1.force.u_dt, /// -dx[0] , p1.rho , p1.density.wcount , p2.rho , p2.density.wcount , w, dwdx, gradw[0], gradw[1], gradw[2]); diff --git a/tests/testVectorize.c b/tests/testVectorize.c deleted file mode 100644 index a18b6e8af5ac3f7b94bd7be3bdf8fd21e49681ff..0000000000000000000000000000000000000000 --- a/tests/testVectorize.c +++ /dev/null @@ -1,212 +0,0 @@ -#include <fenv.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <unistd.h> -#include "swift.h" - -/* n is both particles per axis and box size: - * particles are generated on a mesh with unit spacing - */ -struct cell *make_cell(size_t n, double *offset, double h, - unsigned long long *partId) { - size_t count = n * n * n; - struct cell *cell = malloc(sizeof *cell); - struct part *part; - size_t x, y, z, size; - - size = count * sizeof(struct part); - if (posix_memalign((void **)&cell->parts, part_align, size) != 0) { - error("couldn't allocate particles, no. of particles: %d", (int)count); - } - - part = cell->parts; - for (x = 0; x < n; ++x) { - for (y = 0; y < n; ++y) { - for (z = 0; z < n; ++z) { - // Add .5 for symmetry: 0.5, 1.5, 2.5 vs. 0, 1, 2 - part->x[0] = x + offset[0] + 0.5; - part->x[1] = y + offset[1] + 0.5; - part->x[2] = z + offset[2] + 0.5; - part->v[0] = 1.0f; - part->v[1] = 1.0f; - part->v[2] = 1.0f; - part->h = h; - part->id = ++(*partId); - part->mass = 1.0f; - part->ti_begin = 0; - part->ti_end = 1; - ++part; - } - } - } - - cell->split = 0; - cell->h_max = h; - cell->count = count; - cell->dx_max = 1.; - cell->h[0] = n; - cell->h[1] = n; - cell->h[2] = n; - - cell->sort = malloc(13 * count * sizeof *cell->sort); - runner_dosort(NULL, cell, 0x1FFF, 0); - - return cell; -} - -void clean_up(struct cell *ci) { - free(ci->parts); - free(ci->sort); - free(ci); -} - -/** - * @brief Initializes all particles field to be ready for a density calculation - */ -void zero_particle_fields(struct cell *c) { - - for (size_t pid = 0; pid < c->count; pid++) { - c->parts[pid].rho = 0.f; - c->parts[pid].rho_dh = 0.f; - hydro_init_part(&c->parts[pid]); - } -} - -/** - * @brief Dump all the particles to a file - */ -void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { - - FILE *file = fopen(fileName, "w"); - - fprintf(file, - "# ID rho rho_dh wcount wcount_dh div_v curl_v:[x y z]\n"); - - for (size_t pid = 0; pid < ci->count; pid++) { - fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", ci->parts[pid].id, - ci->parts[pid].rho, ci->parts[pid].rho_dh, - ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, - ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], - ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]); - } - - fprintf(file, "# -----------------------------------\n"); - - for (size_t pjd = 0; pjd < cj->count; pjd++) { - fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", cj->parts[pjd].id, - cj->parts[pjd].rho, cj->parts[pjd].rho_dh, - cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, - cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], - cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]); - } - - fclose(file); -} - -/* Just a forward declaration... */ -void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); - -int main(int argc, char *argv[]) { - size_t particles = 0, runs = 0, volume, type = 0; - double offset[3] = {0, 0, 0}, h = 1.1255; // * DIM/PARTS_PER_AXIS == * 1 - struct cell *ci, *cj; - struct space space; - struct engine engine; - struct runner runner; - char c; - static unsigned long long partId = 0; - ticks tic, toc, time; - - while ((c = getopt(argc, argv, "h:p:r:t:")) != -1) { - switch (c) { - case 'h': - sscanf(optarg, "%lf", &h); - break; - case 'p': - sscanf(optarg, "%zu", &particles); - break; - case 'r': - sscanf(optarg, "%zu", &runs); - break; - case 't': - sscanf(optarg, "%zu", &type); - break; - } - } - - if (h < 0 || particles == 0 || runs == 0 || type > 2) { - printf( - "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" - "\nGenerates a cell pair, filled with particles on a Cartesian grid." - "\nThese are then interacted using runner_dopair1_density." - "\n\nOptions:" - "\n-t TYPE=0 - cells share face (0), edge (1) or corner (2)" - "\n-h DISTANCE=1.1255 - smoothing length\n", - argv[0]); - exit(1); - } - - volume = particles * particles * particles; - message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part)); - - ci = make_cell(particles, offset, h, &partId); - for (size_t i = 0; i < type + 1; ++i) offset[i] = particles; - cj = make_cell(particles, offset, h, &partId); - - for (int i = 0; i < 3; ++i) { - space.h_max = h; - space.dt_step = 0.1; - } - - engine.s = &space; - engine.time = 0.1f; - runner.e = &engine; - - time = 0; - for (size_t i = 0; i < runs; ++i) { - - /* Zero the fields */ - zero_particle_fields(ci); - zero_particle_fields(cj); - - tic = getticks(); - - /* Run the test */ - runner_dopair1_density(&runner, ci, cj); - - toc = getticks(); - time += toc - tic; - - /* Dump if necessary */ - if (i % 50 == 0) dump_particle_fields("swift_dopair.dat", ci, cj); - } - - /* Output timing */ - message("SWIFT calculation took %lli ticks.", time / runs); - - /* Now perform a brute-force version for accuracy tests */ - - /* Zero the fields */ - zero_particle_fields(ci); - zero_particle_fields(cj); - - tic = getticks(); - - /* Run the test */ - pairs_all_density(&runner, ci, cj); - - toc = getticks(); - - /* Dump */ - dump_particle_fields("brute_force.dat", ci, cj); - - /* Output timing */ - message("Brute force calculation took %lli ticks.", toc - tic); - - /* Clean things to make the sanitizer happy ... */ - clean_up(ci); - clean_up(cj); - - return 0; -} diff --git a/tests/tolerance.dat b/tests/tolerance.dat new file mode 100644 index 0000000000000000000000000000000000000000..48de4383eab6214812183be25d3036a324ccbc27 --- /dev/null +++ b/tests/tolerance.dat @@ -0,0 +1,3 @@ +# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-5 1e-5 2e-5 3e-4 1e-5 1e-5 1e-5 1e-5 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-5 1.2e-5 1e-5 1e-5 1e-4 1e-4 1e-4 1e-4