diff --git a/.gitignore b/.gitignore index 8137ea759b24b3f4ec9909a460da4bcb47b0a1ac..9bae25ebff81d077253fd8f1227aad98545d28a0 100644 --- a/.gitignore +++ b/.gitignore @@ -25,15 +25,23 @@ examples/swift_mindt examples/swift_mindt_mpi examples/swift_mpi -tests/testVectorize -tests/brute_force.dat -tests/swift_dopair.dat +tests/testPair +tests/brute_force_standard.dat +tests/swift_dopair_standard.dat +tests/brute_force_perturbed.dat +tests/swift_dopair_perturbed.dat +tests/test27cells +tests/brute_force_27_standard.dat +tests/swift_dopair_27_standard.dat +tests/brute_force_27_perturbed.dat +tests/swift_dopair_27_perturbed.dat tests/testGreetings tests/testReading tests/input.hdf5 tests/testSingle tests/testTimeIntegration tests/testSPHStep +tests/testParser theory/latex/swift.pdf diff --git a/examples/main.c b/examples/main.c index 121439e5afa1f9916d1702873c9432a86c598df6..e68f9e33f8c5c8c59eb6a9d63c040188cb982de3 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1,4 +1,3 @@ - /******************************************************************************* * This file is part of SWIFT. * Copyright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk), @@ -68,7 +67,6 @@ * @brief Main routine that loads a few particles and generates some output. * */ - int main(int argc, char *argv[]) { int c, icount, periodic = 1; @@ -92,7 +90,8 @@ int main(int argc, char *argv[]) { int nr_nodes = 1, myrank = 0; FILE *file_thread; int with_outputs = 1; - int with_gravity = 0; + int with_external_gravity = 0; + int with_self_gravity = 0; int engine_policies = 0; int verbose = 0, talking = 0; unsigned long long cpufreq = 0; @@ -171,7 +170,7 @@ int main(int argc, char *argv[]) { bzero(&s, sizeof(struct space)); /* Parse the options */ - while ((c = getopt(argc, argv, "a:c:d:e:f:gh:m:oP:q:R:s:t:v:w:y:z:")) != -1) + while ((c = getopt(argc, argv, "a:c:d:e:f:gGh:m:oP:q:R:s:t:v:w:y:z:")) != -1) switch (c) { case 'a': if (sscanf(optarg, "%lf", &scaling) != 1) @@ -201,7 +200,10 @@ int main(int argc, char *argv[]) { if (!strcpy(ICfileName, optarg)) error("Error parsing IC file name."); break; case 'g': - with_gravity = 1; + with_external_gravity = 1; + break; + case 'G': + with_self_gravity = 1; break; case 'h': if (sscanf(optarg, "%llu", &cpufreq) != 1) @@ -361,10 +363,6 @@ int main(int argc, char *argv[]) { message("CPU frequency used for tick conversion: %llu Hz", cpufreq); } - /* Check we have sensible time step bounds */ - if (dt_min > dt_max) - error("Minimal time step size must be large than maximal time step size "); - /* Check whether an IC file has been provided */ if (strcmp(ICfileName, "") == 0) error("An IC file name must be provided via the option -f"); @@ -375,7 +373,7 @@ int main(int argc, char *argv[]) { #if defined(WITH_MPI) #if defined(HAVE_PARALLEL_HDF5) read_ic_parallel(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, - myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); + myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); #else read_ic_serial(ICfileName, dim, &parts, &gparts, &Ngas, &Ngpart, &periodic, myrank, nr_nodes, MPI_COMM_WORLD, MPI_INFO_NULL); @@ -406,10 +404,10 @@ int main(int argc, char *argv[]) { #endif /* MATTHIEU: Temporary fix to preserve master */ - if (!with_gravity) { + if (!with_external_gravity && !with_self_gravity) { free(gparts); - for(size_t k = 0; k < Ngas; ++k) - parts[k].gpart = NULL; + gparts = NULL; + for (size_t k = 0; k < Ngas; ++k) parts[k].gpart = NULL; Ngpart = 0; #if defined(WITH_MPI) N_long[0] = Ngas; @@ -482,7 +480,8 @@ int main(int argc, char *argv[]) { /* Construct the engine policy */ engine_policies = ENGINE_POLICY | engine_policy_steal | engine_policy_hydro; - if (with_gravity) engine_policies |= engine_policy_external_gravity; + if (with_external_gravity) engine_policies |= engine_policy_external_gravity; + if (with_self_gravity) engine_policies |= engine_policy_self_gravity; /* Initialize the engine with this space. */ if (myrank == 0) clocks_gettime(&tic); @@ -560,8 +559,8 @@ int main(int argc, char *argv[]) { /* Legend */ if (myrank == 0) printf( - "# Step Time time-step Number of updates CPU Wall-clock time " - "[%s]\n", + "# Step Time time-step Number of updates Number of updates " + "CPU Wall-clock time [%s]\n", clocks_getunit()); /* Let loose a runner on the space. */ diff --git a/src/Makefile.am b/src/Makefile.am index f44d47819672d10445fd969fe2ff20dbcb49463b..15c05a2a00d33ad86e7144b4a8e377252a2eedce 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,13 +35,13 @@ endif # List required headers include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ engine.h swift.h serial_io.h timers.h debug.h scheduler.h proxy.h parallel_io.h \ - common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h + common_io.h single_io.h multipole.h map.h tools.h partition.h clocks.h parser.h # Common source files AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c \ serial_io.c timers.c debug.c scheduler.c proxy.c parallel_io.c \ units.c common_io.c single_io.c multipole.c version.c map.c \ - kernel.c tools.c part.c partition.c clocks.c + kernel.c tools.c part.c partition.c clocks.c parser.c # Include files for distribution, not installation. nobase_noinst_HEADERS = approx_math.h atomic.h cycle.h error.h inline.h kernel.h vector.h \ diff --git a/src/cell.c b/src/cell.c index 68f4fcf7f9967b7ad42d8597190e82a5baaaf951..b2a27fb99038cfe71af7abeb83af911868bc9d87 100644 --- a/src/cell.c +++ b/src/cell.c @@ -45,6 +45,7 @@ /* Local headers. */ #include "atomic.h" #include "error.h" +#include "gravity.h" #include "hydro.h" #include "space.h" #include "timers.h" @@ -89,14 +90,18 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { c->ti_end_min = pc->ti_end_min; c->ti_end_max = pc->ti_end_max; c->count = pc->count; + c->gcount = pc->gcount; c->tag = pc->tag; - /* Fill the progeny recursively, depth-first. */ + /* Number of new cells created. */ int count = 1; + + /* Fill the progeny recursively, depth-first. */ for (int k = 0; k < 8; k++) if (pc->progeny[k] >= 0) { struct cell *temp = space_getcell(s); temp->count = 0; + temp->gcount = 0; temp->loc[0] = c->loc[0]; temp->loc[1] = c->loc[1]; temp->loc[2] = c->loc[2]; @@ -122,7 +127,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { } /** - * @brief Link the cells recursively to the given part array. + * @brief Link the cells recursively to the given #part array. * * @param c The #cell. * @param parts The #part array. @@ -130,7 +135,7 @@ int cell_unpack(struct pcell *pc, struct cell *c, struct space *s) { * @return The number of particles linked. */ -int cell_link(struct cell *c, struct part *parts) { +int cell_link_parts(struct cell *c, struct part *parts) { c->parts = parts; @@ -139,14 +144,40 @@ int cell_link(struct cell *c, struct part *parts) { int offset = 0; for (int k = 0; k < 8; k++) { if (c->progeny[k] != NULL) - offset += cell_link(c->progeny[k], &parts[offset]); + offset += cell_link_parts(c->progeny[k], &parts[offset]); } } - /* Return the total number of unpacked cells. */ + /* Return the total number of linked particles. */ return c->count; } +/** + * @brief Link the cells recursively to the given #gpart array. + * + * @param c The #cell. + * @param gparts The #gpart array. + * + * @return The number of particles linked. + */ + +int cell_link_gparts(struct cell *c, struct gpart *gparts) { + + c->gparts = gparts; + + /* Fill the progeny recursively, depth-first. */ + if (c->split) { + int offset = 0; + for (int k = 0; k < 8; k++) { + if (c->progeny[k] != NULL) + offset += cell_link_gparts(c->progeny[k], &gparts[offset]); + } + } + + /* Return the total number of linked particles. */ + return c->gcount; +} + /** * @brief Pack the data of the given cell and all it's sub-cells. * @@ -164,6 +195,7 @@ int cell_pack(struct cell *c, struct pcell *pc) { pc->ti_end_min = c->ti_end_min; pc->ti_end_max = c->ti_end_max; pc->count = c->count; + pc->gcount = c->gcount; c->tag = pc->tag = atomic_inc(&cell_next_tag) % cell_max_tag; /* Fill in the progeny, depth-first recursion. */ @@ -579,6 +611,27 @@ void cell_init_parts(struct cell *c, void *data) { c->ti_end_max = 0; } +/** + * @brief Initialises all g-particles to a valid state even if the ICs were + *stupid + * + * @param c Cell to act upon + * @param data Unused parameter + */ +void cell_init_gparts(struct cell *c, void *data) { + + struct gpart *gp = c->gparts; + const int gcount = c->gcount; + + for (int i = 0; i < gcount; ++i) { + gp[i].ti_begin = 0; + gp[i].ti_end = 0; + gravity_first_init_gpart(&gp[i]); + } + c->ti_end_min = 0; + c->ti_end_max = 0; +} + /** * @brief Converts hydro quantities to a valid state after the initial density *calculation diff --git a/src/cell.h b/src/cell.h index 25e9342cab8948310ad58e28b13fd53282e18c28..607073660946fac1acda5cea512ed8adb4aa5218 100644 --- a/src/cell.h +++ b/src/cell.h @@ -44,7 +44,7 @@ struct pcell { int ti_end_min, ti_end_max; /* Number of particles in this cell. */ - int count; + int count, gcount; /* tag used for MPI communication. */ int tag; @@ -144,7 +144,7 @@ struct cell { double mass, e_pot, e_int, e_kin; /* Number of particles updated in this cell. */ - int updated; + int updated, g_updated; /* Linking pointer for "memory management". */ struct cell *next; @@ -178,8 +178,10 @@ void cell_gunlocktree(struct cell *c); int cell_pack(struct cell *c, struct pcell *pc); int cell_unpack(struct pcell *pc, struct cell *c, struct space *s); int cell_getsize(struct cell *c); -int cell_link(struct cell *c, struct part *parts); +int cell_link_parts(struct cell *c, struct part *parts); +int cell_link_gparts(struct cell *c, struct gpart *gparts); void cell_init_parts(struct cell *c, void *data); +void cell_init_gparts(struct cell *c, void *data); void cell_convert_hydro(struct cell *c, void *data); void cell_clean_links(struct cell *c, void *data); diff --git a/src/common_io.c b/src/common_io.c index 5fb2d9513ec2acc0cd8d389a226b14d427e02539..6183effe9ce392ab930c581cbd118f025bbce773 100644 --- a/src/common_io.c +++ b/src/common_io.c @@ -502,7 +502,7 @@ void writeXMFline(FILE* xmfFile, char* fileName, char* partTypeGroupName, * @param gparts The array of #gpart freshly read in. * @param Ndm The number of DM particles read in. */ -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm) { /* Let's give all these gparts a negative id */ for (size_t i = 0; i < Ndm; ++i) { @@ -527,8 +527,9 @@ void prepare_dm_gparts(struct gpart* gparts, size_t Ndm) { * @param Ngas The number of gas particles read in. * @param Ndm The number of DM particles read in. */ -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm) { +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm) { for (size_t i = 0; i < Ngas; ++i) { @@ -557,16 +558,19 @@ void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, * @param dmparts The array of #gpart containg DM particles to be filled. * @param Ndm The number of DM particles. */ -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm) { +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm) { size_t count = 0; /* Loop over all gparts */ for (size_t i = 0; i < Ntot; ++i) { + /* message("i=%zd count=%zd id=%lld part=%p", i, count, gparts[i].id, + * gparts[i].part); */ + /* And collect the DM ones */ - if (gparts[i].id < 0) { + if (gparts[i].id < 0LL) { memcpy(&dmparts[count], &gparts[i], sizeof(struct gpart)); dmparts[count].id = -dmparts[count].id; count++; diff --git a/src/common_io.h b/src/common_io.h index 4ad0c6fb754c4288a0c731e2b1e2392998719d52..961f40e63d771e5e06ade525301caf59aae0bceb 100644 --- a/src/common_io.h +++ b/src/common_io.h @@ -78,11 +78,12 @@ extern const char* particle_type_names[]; hid_t hdf5Type(enum DATA_TYPE type); size_t sizeOfType(enum DATA_TYPE type); -void collect_dm_gparts(struct gpart* gparts, size_t Ntot, struct gpart* dmparts, - size_t Ndm); -void prepare_dm_gparts(struct gpart* gparts, size_t Ndm); -void duplicate_hydro_gparts(struct part* parts, struct gpart* gparts, - size_t Ngas, size_t Ndm); +void collect_dm_gparts(const struct gpart* const gparts, size_t Ntot, + struct gpart* const dmparts, size_t Ndm); +void prepare_dm_gparts(struct gpart* const gparts, size_t Ndm); +void duplicate_hydro_gparts(struct part* const parts, + struct gpart* const gparts, size_t Ngas, + size_t Ndm); void readAttribute(hid_t grp, char* name, enum DATA_TYPE type, void* data); diff --git a/src/engine.c b/src/engine.c index 05198b5dd9435713e894e3353d19b85abed1d6e3..1b196c3c13e116d6b24a5c735af9112fdb98458f 100644 --- a/src/engine.c +++ b/src/engine.c @@ -157,39 +157,56 @@ void engine_make_ghost_tasks(struct engine *e, struct cell *c, * @brief Redistribute the particles amongst the nodes according * to their cell's node IDs. * + * The strategy here is as follows: + * 1) Each node counts the number of particles it has to send to each other + * node. + * 2) The number of particles of each type is then exchanged. + * 3) The particles to send are placed in a temporary buffer in which the + * part-gpart links are preserved. + * 4) Each node allocates enough space for the new particles. + * 5) (Asynchronous) communications are issued to transfer the data. + * + * * @param e The #engine. */ - void engine_redistribute(struct engine *e) { #ifdef WITH_MPI - int nr_nodes = e->nr_nodes, nodeID = e->nodeID; + const int nr_nodes = e->nr_nodes; + const int nodeID = e->nodeID; struct space *s = e->s; - int my_cells = 0; - int *cdim = s->cdim; struct cell *cells = s->cells; - int nr_cells = s->nr_cells; + const int nr_cells = s->nr_cells; + const int *cdim = s->cdim; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + struct part *parts = s->parts; + struct xpart *xparts = s->xparts; + struct gpart *gparts = s->gparts; ticks tic = getticks(); - /* Start by sorting the particles according to their nodes and - getting the counts. The counts array is indexed as - count[from * nr_nodes + to]. */ - int *counts; - size_t *dest; - double ih[3], dim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - if ((counts = (int *)malloc(sizeof(int) *nr_nodes *nr_nodes)) == NULL || - (dest = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) - error("Failed to allocate count and dest buffers."); + /* Allocate temporary arrays to store the counts of particles to be sent + and the destination of each particle */ + int *counts, *g_counts; + if ((counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate count temporary buffer."); + if ((g_counts = (int *)malloc(sizeof(int) * nr_nodes * nr_nodes)) == NULL) + error("Failed to allocate gcount temporary buffer."); bzero(counts, sizeof(int) * nr_nodes * nr_nodes); - struct part *parts = s->parts; + bzero(g_counts, sizeof(int) * nr_nodes * nr_nodes); + + // Allocate the destination index arrays. + int *dest, *g_dest; + if ((dest = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) + error("Failed to allocate dest temporary buffer."); + if ((g_dest = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) + error("Failed to allocate g_dest temporary buffer."); + + /* Get destination of each particle */ for (size_t k = 0; k < s->nr_parts; k++) { + + /* Periodic boundary conditions */ for (int j = 0; j < 3; j++) { if (parts[k].x[j] < 0.0) parts[k].x[j] += dim[j]; @@ -202,36 +219,121 @@ void engine_redistribute(struct engine *e) { error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", cid, k, parts[k].x[0], parts[k].x[1], parts[k].x[2]); */ dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ counts[nodeID * nr_nodes + dest[k]] += 1; } + + /* Sort the particles according to their cell index. */ space_parts_sort(s, dest, s->nr_parts, 0, nr_nodes - 1, e->verbose); + /* We need to re-link the gpart partners of parts. */ + int current_dest = dest[0]; + size_t count_this_dest = 0; + for (size_t k = 0; k < s->nr_parts; ++k) { + if (s->parts[k].gpart != NULL) { + + /* As the addresses will be invalidated by the communications, we will */ + /* instead store the absolute index from the start of the sub-array */ + /* of particles to be sent to a given node. */ + /* Recall that gparts without partners have a negative id. */ + /* We will restore the pointers on the receiving node later on. */ + if (dest[k] != current_dest) { + current_dest = dest[k]; + count_this_dest = 0; + } + + /* Debug */ + /* if(s->parts[k].gpart->id < 0) */ + /* error("Trying to link a partnerless gpart !"); */ + + s->parts[k].gpart->id = count_this_dest; + count_this_dest++; + } + } + + /* Get destination of each g-particle */ + for (size_t k = 0; k < s->nr_gparts; k++) { + + /* Periodic boundary conditions */ + for (int j = 0; j < 3; j++) { + if (gparts[k].x[j] < 0.0) + gparts[k].x[j] += dim[j]; + else if (gparts[k].x[j] >= dim[j]) + gparts[k].x[j] -= dim[j]; + } + const int cid = cell_getid(cdim, gparts[k].x[0] * ih[0], + gparts[k].x[1] * ih[1], gparts[k].x[2] * ih[2]); + /* if (cid < 0 || cid >= s->nr_cells) + error("Bad cell id %i for part %i at [%.3e,%.3e,%.3e].", + cid, k, g_parts[k].x[0], g_parts[k].x[1], g_parts[k].x[2]); */ + g_dest[k] = cells[cid].nodeID; + + /* The counts array is indexed as count[from * nr_nodes + to]. */ + g_counts[nodeID * nr_nodes + g_dest[k]] += 1; + } + + /* Sort the gparticles according to their cell index. */ + space_gparts_sort(gparts, g_dest, s->nr_gparts, 0, nr_nodes - 1); + /* Get all the counts from all the nodes. */ if (MPI_Allreduce(MPI_IN_PLACE, counts, nr_nodes * nr_nodes, MPI_INT, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to allreduce particle transfer counts."); - /* Get the new number of parts for this node, be generous in allocating. */ - size_t nr_parts = 0; + /* Get all the g_counts from all the nodes. */ + if (MPI_Allreduce(MPI_IN_PLACE, g_counts, nr_nodes * nr_nodes, MPI_INT, + MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) + error("Failed to allreduce gparticle transfer counts."); + + /* Each node knows how many parts and gparts will be transferred to every + other node. We can start preparing to receive data */ + + /* Get the new number of parts and gparts for this node */ + size_t nr_parts = 0, nr_gparts = 0; for (int k = 0; k < nr_nodes; k++) nr_parts += counts[k * nr_nodes + nodeID]; + for (int k = 0; k < nr_nodes; k++) + nr_gparts += g_counts[k * nr_nodes + nodeID]; + + /* Allocate the new arrays with some extra margin */ struct part *parts_new = NULL; - struct xpart *xparts_new = NULL, *xparts = s->xparts; + struct xpart *xparts_new = NULL; + struct gpart *gparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, - sizeof(struct part) * nr_parts * 1.2) != 0 || - posix_memalign((void **)&xparts_new, part_align, - sizeof(struct xpart) * nr_parts * 1.2) != 0) + sizeof(struct part) * nr_parts * + engine_redistribute_alloc_margin) != 0) error("Failed to allocate new part data."); - - /* Emit the sends and recvs for the particle data. */ + if (posix_memalign((void **)&xparts_new, xpart_align, + sizeof(struct xpart) * nr_parts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new xpart data."); + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * nr_gparts * + engine_redistribute_alloc_margin) != 0) + error("Failed to allocate new gpart data."); + + /* Prepare MPI requests for the asynchronous communications */ MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 4 * nr_nodes)) == + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 6 * nr_nodes)) == NULL) error("Failed to allocate MPI request list."); - for (int k = 0; k < 4 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; - for (size_t offset_send = 0, offset_recv = 0, k = 0; k < nr_nodes; k++) { - int ind_send = nodeID * nr_nodes + k; - int ind_recv = k * nr_nodes + nodeID; + for (int k = 0; k < 6 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + + /* Emit the sends and recvs for the particle and gparticle data. */ + size_t offset_send = 0, offset_recv = 0; + size_t g_offset_send = 0, g_offset_recv = 0; + for (int k = 0; k < nr_nodes; k++) { + + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; + + /* Are we sending any part/xpart ? */ if (counts[ind_send] > 0) { + + /* message("Sending %d part to node %d", counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ if (k == nodeID) { memcpy(&parts_new[offset_recv], &s->parts[offset_send], sizeof(struct part) * counts[ind_recv]); @@ -239,36 +341,73 @@ void engine_redistribute(struct engine *e) { sizeof(struct xpart) * counts[ind_recv]); offset_send += counts[ind_send]; offset_recv += counts[ind_recv]; + + /* Else, emit some communications */ } else { - if (MPI_Isend(&s->parts[offset_send], counts[ind_send], - e->part_mpi_type, k, 2 * ind_send + 0, MPI_COMM_WORLD, - &reqs[4 * k]) != MPI_SUCCESS) - error("Failed to isend parts to node %zi.", k); - if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], - e->xpart_mpi_type, k, 2 * ind_send + 1, MPI_COMM_WORLD, - &reqs[4 * k + 1]) != MPI_SUCCESS) - error("Failed to isend xparts to node %zi.", k); + if (MPI_Isend(&s->parts[offset_send], counts[ind_send], part_mpi_type, + k, 3 * ind_send + 0, MPI_COMM_WORLD, + &reqs[6 * k]) != MPI_SUCCESS) + error("Failed to isend parts to node %i.", k); + if (MPI_Isend(&s->xparts[offset_send], counts[ind_send], xpart_mpi_type, + k, 3 * ind_send + 1, MPI_COMM_WORLD, + &reqs[6 * k + 1]) != MPI_SUCCESS) + error("Failed to isend xparts to node %i.", k); offset_send += counts[ind_send]; } } + + /* Are we sending any gpart ? */ + if (g_counts[ind_send] > 0) { + + /* message("Sending %d gpart to node %d", g_counts[ind_send], k); */ + + /* If the send is to the same node, just copy */ + if (k == nodeID) { + memcpy(&gparts_new[g_offset_recv], &s->gparts[g_offset_send], + sizeof(struct gpart) * g_counts[ind_recv]); + g_offset_send += g_counts[ind_send]; + g_offset_recv += g_counts[ind_recv]; + + /* Else, emit some communications */ + } else { + if (MPI_Isend(&s->gparts[g_offset_send], g_counts[ind_send], + gpart_mpi_type, k, 3 * ind_send + 2, MPI_COMM_WORLD, + &reqs[6 * k + 2]) != MPI_SUCCESS) + error("Failed to isend gparts to node %i.", k); + g_offset_send += g_counts[ind_send]; + } + } + + /* Now emit the corresponding Irecv() */ + + /* Are we receiving any part/xpart from this node ? */ if (k != nodeID && counts[ind_recv] > 0) { - if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], e->part_mpi_type, - k, 2 * ind_recv + 0, MPI_COMM_WORLD, - &reqs[4 * k + 2]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); - if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], - e->xpart_mpi_type, k, 2 * ind_recv + 1, MPI_COMM_WORLD, - &reqs[4 * k + 3]) != MPI_SUCCESS) - error("Failed to emit irecv of parts from node %zi.", k); + if (MPI_Irecv(&parts_new[offset_recv], counts[ind_recv], part_mpi_type, k, + 3 * ind_recv + 0, MPI_COMM_WORLD, + &reqs[6 * k + 3]) != MPI_SUCCESS) + error("Failed to emit irecv of parts from node %i.", k); + if (MPI_Irecv(&xparts_new[offset_recv], counts[ind_recv], xpart_mpi_type, + k, 3 * ind_recv + 1, MPI_COMM_WORLD, + &reqs[6 * k + 4]) != MPI_SUCCESS) + error("Failed to emit irecv of xparts from node %i.", k); offset_recv += counts[ind_recv]; } + + /* Are we receiving any gpart from this node ? */ + if (k != nodeID && g_counts[ind_recv] > 0) { + if (MPI_Irecv(&gparts_new[g_offset_recv], g_counts[ind_recv], + gpart_mpi_type, k, 3 * ind_recv + 2, MPI_COMM_WORLD, + &reqs[6 * k + 5]) != MPI_SUCCESS) + error("Failed to emit irecv of gparts from node %i.", k); + g_offset_recv += g_counts[ind_recv]; + } } /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[4 * nr_nodes]; + MPI_Status stats[6 * nr_nodes]; int res; - if ((res = MPI_Waitall(4 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 4 * nr_nodes; k++) { + if ((res = MPI_Waitall(6 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 6 * nr_nodes; k++) { char buff[MPI_MAX_ERROR_STRING]; int res; MPI_Error_string(stats[k].MPI_ERROR, buff, &res); @@ -277,33 +416,90 @@ void engine_redistribute(struct engine *e) { error("Failed during waitall for part data."); } + /* We now need to restore the part<->gpart links */ + size_t offset_parts = 0, offset_gparts = 0; + for (int node = 0; node < nr_nodes; ++node) { + + const int ind_recv = node * nr_nodes + nodeID; + const size_t count_parts = counts[ind_recv]; + const size_t count_gparts = g_counts[ind_recv]; + + /* Loop over the gparts received from that node */ + for (size_t k = offset_gparts; k < offset_gparts + count_gparts; ++k) { + + /* Does this gpart have a partner ? */ + if (gparts_new[k].id >= 0) { + + const size_t partner_index = offset_parts + gparts_new[k].id; + + /* Re-link */ + gparts_new[k].part = &parts_new[partner_index]; + gparts_new[k].part->gpart = &gparts_new[k]; + } + } + + offset_parts += count_parts; + offset_gparts += count_gparts; + } + /* Verify that all parts are in the right place. */ /* for ( int k = 0 ; k < nr_parts ; k++ ) { - int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] ); + int cid = cell_getid( cdim , parts_new[k].x[0]*ih[0], + parts_new[k].x[1]*ih[1], parts_new[k].x[2]*ih[2] ); if ( cells[ cid ].nodeID != nodeID ) - error( "Received particle (%i) that does not belong here (nodeID=%i).", k , cells[ cid ].nodeID ); + error( "Received particle (%i) that does not belong here + (nodeID=%i).", k , cells[ cid ].nodeID ); } */ + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (gparts_new[k].id > 0) { + + if (gparts_new[k].part->gpart != &gparts_new[k]) + error("Linking problem !"); + + if (gparts_new[k].x[0] != gparts_new[k].part->x[0] || + gparts_new[k].x[1] != gparts_new[k].part->x[1] || + gparts_new[k].x[2] != gparts_new[k].part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (parts_new[k].gpart != NULL) { + + if (parts_new[k].gpart->part != &parts_new[k]) error("Linking problem !"); + } + } + /* Set the new part data, free the old. */ free(parts); free(xparts); + free(gparts); s->parts = parts_new; s->xparts = xparts_new; + s->gparts = gparts_new; s->nr_parts = nr_parts; - s->size_parts = 1.2 * nr_parts; + s->nr_gparts = nr_gparts; + s->size_parts = engine_redistribute_alloc_margin * nr_parts; + s->size_gparts = engine_redistribute_alloc_margin * nr_gparts; - /* Be verbose about what just happened. */ - for (int k = 0; k < nr_cells; k++) - if (cells[k].nodeID == nodeID) my_cells += 1; - if (e->verbose) - message("node %i now has %zi parts in %i cells.", nodeID, nr_parts, - my_cells); - - /* Clean up other stuff. */ + /* Clean up the temporary stuff. */ free(reqs); free(counts); free(dest); + /* Be verbose about what just happened. */ + if (e->verbose) { + int my_cells = 0; + for (int k = 0; k < nr_cells; k++) + if (cells[k].nodeID == nodeID) my_cells += 1; + message("node %i now has %zi parts and %zi gparts in %i cells.", nodeID, + nr_parts, nr_gparts, my_cells); + } + if (e->verbose) message("took %.3f %s.", clocks_from_ticks(getticks() - tic), clocks_getunit()); @@ -565,31 +761,40 @@ void engine_exchange_cells(struct engine *e) { /* Count the number of particles we need to import and re-allocate the buffer if needed. */ - int count_in = 0; + int count_parts_in = 0, count_gparts_in = 0; for (int k = 0; k < nr_proxies; k++) - for (int j = 0; j < e->proxies[k].nr_cells_in; j++) - count_in += e->proxies[k].cells_in[j]->count; - if (count_in > s->size_parts_foreign) { + for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { + count_parts_in += e->proxies[k].cells_in[j]->count; + count_gparts_in += e->proxies[k].cells_in[j]->gcount; + } + if (count_parts_in > s->size_parts_foreign) { if (s->parts_foreign != NULL) free(s->parts_foreign); - s->size_parts_foreign = 1.1 * count_in; + s->size_parts_foreign = 1.1 * count_parts_in; if (posix_memalign((void **)&s->parts_foreign, part_align, sizeof(struct part) * s->size_parts_foreign) != 0) error("Failed to allocate foreign part data."); } + if (count_gparts_in > s->size_gparts_foreign) { + if (s->gparts_foreign != NULL) free(s->gparts_foreign); + s->size_gparts_foreign = 1.1 * count_gparts_in; + if (posix_memalign((void **)&s->gparts_foreign, gpart_align, + sizeof(struct gpart) * s->size_gparts_foreign) != 0) + error("Failed to allocate foreign gpart data."); + } /* Unpack the cells and link to the particle data. */ struct part *parts = s->parts_foreign; + struct gpart *gparts = s->gparts_foreign; for (int k = 0; k < nr_proxies; k++) { for (int j = 0; j < e->proxies[k].nr_cells_in; j++) { - cell_link(e->proxies[k].cells_in[j], parts); + cell_link_parts(e->proxies[k].cells_in[j], parts); + cell_link_gparts(e->proxies[k].cells_in[j], gparts); parts = &parts[e->proxies[k].cells_in[j]->count]; + gparts = &gparts[e->proxies[k].cells_in[j]->gcount]; } } s->nr_parts_foreign = parts - s->parts_foreign; - - /* Is the parts buffer large enough? */ - if (s->nr_parts_foreign > s->size_parts_foreign) - error("Foreign parts buffer too small."); + s->nr_gparts_foreign = gparts - s->gparts_foreign; /* Free the pcell buffer. */ free(pcells); @@ -607,16 +812,24 @@ void engine_exchange_cells(struct engine *e) { * @brief Exchange straying parts with other nodes. * * @param e The #engine. - * @param offset The index in the parts array as of which the foreign parts - *reside. - * @param ind The ID of the foreign #cell. - * @param N The number of stray parts. + * @param offset_parts The index in the parts array as of which the foreign + * parts reside. + * @param ind_part The foreign #cell ID of each part. + * @param Npart The number of stray parts, contains the number of parts received + * on return. + * @param offset_gparts The index in the gparts array as of which the foreign + * parts reside. + * @param ind_gpart The foreign #cell ID of each gpart. + * @param Ngpart The number of stray gparts, contains the number of gparts + * received on return. * - * @return The number of arrived parts copied to parts and xparts. + * Note that this function does not mess-up the linkage between parts and + * gparts, i.e. the received particles have correct linkeage. */ -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, - size_t N) { +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart) { #ifdef WITH_MPI @@ -626,25 +839,49 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Re-set the proxies. */ for (int k = 0; k < e->nr_proxies; k++) e->proxies[k].nr_parts_out = 0; - /* Put the parts into the corresponding proxies. */ - for (size_t k = 0; k < N; k++) { - const int node_id = e->s->cells[ind[k]].nodeID; + /* Put the parts and gparts into the corresponding proxies. */ + for (size_t k = 0; k < *Npart; k++) { + /* Get the target node and proxy ID. */ + const int node_id = e->s->cells[ind_part[k]].nodeID; if (node_id < 0 || node_id >= e->nr_nodes) error("Bad node ID %i.", node_id); const int pid = e->proxy_ind[node_id]; - if (pid < 0) + if (pid < 0) { error( "Do not have a proxy for the requested nodeID %i for part with " "id=%llu, x=[%e,%e,%e].", - node_id, s->parts[offset + k].id, s->parts[offset + k].x[0], - s->parts[offset + k].x[1], s->parts[offset + k].x[2]); - proxy_parts_load(&e->proxies[pid], &s->parts[offset + k], - &s->xparts[offset + k], 1); + node_id, s->parts[offset_parts + k].id, + s->parts[offset_parts + k].x[0], s->parts[offset_parts + k].x[1], + s->parts[offset_parts + k].x[2]); + } + + /* Re-link the associated gpart with the buffer offset of the part. */ + if (s->parts[offset_parts + k].gpart != NULL) { + s->parts[offset_parts + k].gpart->id = e->proxies[pid].nr_parts_in; + } + + /* Load the part and xpart into the proxy. */ + proxy_parts_load(&e->proxies[pid], &s->parts[offset_parts + k], + &s->xparts[offset_parts + k], 1); + } + for (size_t k = 0; k < *Ngpart; k++) { + const int node_id = e->s->cells[ind_gpart[k]].nodeID; + if (node_id < 0 || node_id >= e->nr_nodes) + error("Bad node ID %i.", node_id); + const int pid = e->proxy_ind[node_id]; + if (pid < 0) + error( + "Do not have a proxy for the requested nodeID %i for part with " + "id=%lli, x=[%e,%e,%e].", + node_id, s->gparts[offset_parts + k].id, + s->gparts[offset_gparts + k].x[0], s->gparts[offset_parts + k].x[1], + s->gparts[offset_gparts + k].x[2]); + proxy_gparts_load(&e->proxies[pid], &s->gparts[offset_gparts + k], 1); } /* Launch the proxies. */ - MPI_Request reqs_in[2 * engine_maxproxies]; - MPI_Request reqs_out[2 * engine_maxproxies]; + MPI_Request reqs_in[3 * engine_maxproxies]; + MPI_Request reqs_out[3 * engine_maxproxies]; for (int k = 0; k < e->nr_proxies; k++) { proxy_parts_exch1(&e->proxies[k]); reqs_in[k] = e->proxies[k].req_parts_count_in; @@ -668,11 +905,18 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, /* Count the total number of incoming particles and make sure we have enough space to accommodate them. */ - size_t count_in = 0; - for (int k = 0; k < e->nr_proxies; k++) count_in += e->proxies[k].nr_parts_in; - if (e->verbose) message("sent out %zi particles, got %zi back.", N, count_in); - if (offset + count_in > s->size_parts) { - s->size_parts = (offset + count_in) * 1.05; + int count_parts_in = 0; + int count_gparts_in = 0; + for (int k = 0; k < e->nr_proxies; k++) { + count_parts_in += e->proxies[k].nr_parts_in; + count_gparts_in += e->proxies[k].nr_gparts_in; + } + if (e->verbose) { + message("sent out %zi/%zi parts/gparts, got %i/%i back.", *Npart, *Ngpart, + count_parts_in, count_gparts_in); + } + if (offset_parts + count_parts_in > s->size_parts) { + s->size_parts = (offset_parts + count_parts_in) * engine_parts_size_grow; struct part *parts_new = NULL; struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, @@ -680,37 +924,61 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, posix_memalign((void **)&xparts_new, part_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); - memcpy(parts_new, s->parts, sizeof(struct part) * offset); - memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset); + memcpy(parts_new, s->parts, sizeof(struct part) * offset_parts); + memcpy(xparts_new, s->xparts, sizeof(struct xpart) * offset_parts); free(s->parts); free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; } + if (offset_gparts + count_gparts_in > s->size_gparts) { + s->size_gparts = (offset_gparts + count_gparts_in) * engine_parts_size_grow; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * offset_gparts); + free(s->gparts); + s->gparts = gparts_new; + } /* Collect the requests for the particle data from the proxies. */ int nr_in = 0, nr_out = 0; for (int k = 0; k < e->nr_proxies; k++) { if (e->proxies[k].nr_parts_in > 0) { - reqs_in[2 * k] = e->proxies[k].req_parts_in; - reqs_in[2 * k + 1] = e->proxies[k].req_xparts_in; + reqs_in[3 * k] = e->proxies[k].req_parts_in; + reqs_in[3 * k + 1] = e->proxies[k].req_xparts_in; + nr_in += 2; + } else { + reqs_in[3 * k] = reqs_in[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_in > 0) { + reqs_in[3 * k + 2] = e->proxies[k].req_gparts_in; nr_in += 1; - } else - reqs_in[2 * k] = reqs_in[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_in[3 * k + 2] = MPI_REQUEST_NULL; + } if (e->proxies[k].nr_parts_out > 0) { - reqs_out[2 * k] = e->proxies[k].req_parts_out; - reqs_out[2 * k + 1] = e->proxies[k].req_xparts_out; + reqs_out[3 * k] = e->proxies[k].req_parts_out; + reqs_out[3 * k + 1] = e->proxies[k].req_xparts_out; + nr_out += 2; + } else { + reqs_out[3 * k] = reqs_out[3 * k + 1] = MPI_REQUEST_NULL; + } + if (e->proxies[k].nr_gparts_out > 0) { + reqs_out[3 * k + 2] = e->proxies[k].req_gparts_out; nr_out += 1; - } else - reqs_out[2 * k] = reqs_out[2 * k + 1] = MPI_REQUEST_NULL; + } else { + reqs_out[3 * k + 2] = MPI_REQUEST_NULL; + } } /* Wait for each part array to come in and collect the new parts from the proxies. */ - size_t count = 0; - for (int k = 0; k < 2 * (nr_in + nr_out); k++) { - int err = 0, pid = MPI_UNDEFINED; - if ((err = MPI_Waitany(2 * e->nr_proxies, reqs_in, &pid, + int count_parts = 0, count_gparts = 0; + for (int k = 0; k < nr_in; k++) { + int err, pid; + if ((err = MPI_Waitany(3 * e->nr_proxies, reqs_in, &pid, MPI_STATUS_IGNORE)) != MPI_SUCCESS) { char buff[MPI_MAX_ERROR_STRING]; int res; @@ -718,26 +986,46 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, error("MPI_Waitany failed (%s).", buff); } if (pid == MPI_UNDEFINED) break; - // message( "request from proxy %i has arrived." , pid ); - if (reqs_in[pid & ~1] == MPI_REQUEST_NULL && - reqs_in[pid | 1] == MPI_REQUEST_NULL) { + // message( "request from proxy %i has arrived." , pid / 3 ); + pid = 3 * (pid / 3); + + /* If all the requests for a given proxy have arrived... */ + if (reqs_in[pid + 0] == MPI_REQUEST_NULL && + reqs_in[pid + 1] == MPI_REQUEST_NULL && + reqs_in[pid + 2] == MPI_REQUEST_NULL) { + /* Copy the particle data to the part/xpart/gpart arrays. */ struct proxy *p = &e->proxies[pid >> 1]; - memcpy(&s->parts[offset + count], p->parts_in, + memcpy(&s->parts[offset_parts + count_parts], p->parts_in, sizeof(struct part) * p->nr_parts_in); - memcpy(&s->xparts[offset + count], p->xparts_in, + memcpy(&s->xparts[offset_parts + count_parts], p->xparts_in, sizeof(struct xpart) * p->nr_parts_in); + memcpy(&s->gparts[offset_gparts + count_gparts], p->gparts_in, + sizeof(struct gpart) * p->nr_gparts_in); /* for (int k = offset; k < offset + count; k++) message( "received particle %lli, x=[%.3e %.3e %.3e], h=%.3e, from node %i.", s->parts[k].id, s->parts[k].x[0], s->parts[k].x[1], s->parts[k].x[2], s->parts[k].h, p->nodeID); */ - count += p->nr_parts_in; + + /* Re-link the gparts. */ + for (int k = 0; k < p->nr_gparts_in; k++) { + struct gpart *gp = &s->gparts[offset_gparts + count_gparts + k]; + if (gp->id >= 0) { + struct part *p = &s->parts[offset_gparts + count_parts + gp->id]; + gp->part = p; + p->gpart = gp; + } + } + + /* Advance the counters. */ + count_parts += p->nr_parts_in; + count_gparts += p->nr_gparts_in; } } /* Wait for all the sends to have finished too. */ if (nr_out > 0) - if (MPI_Waitall(2 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != + if (MPI_Waitall(3 * e->nr_proxies, reqs_out, MPI_STATUSES_IGNORE) != MPI_SUCCESS) error("MPI_Waitall on sends failed."); @@ -746,11 +1034,11 @@ int engine_exchange_strays(struct engine *e, int offset, size_t *ind, clocks_getunit()); /* Return the number of harvested parts. */ - return count; + *Npart = count_parts; + *Ngpart = count_gparts; #else error("SWIFT was not compiled with MPI support."); - return 0; #endif } @@ -1288,6 +1576,7 @@ int engine_marktasks(struct engine *e) { else if (t->type == task_type_kick) { t->skip = (t->ci->ti_end_min > ti_end); t->ci->updated = 0; + t->ci->g_updated = 0; } /* Drift? */ @@ -1344,6 +1633,7 @@ void engine_print_task_counts(struct engine *e) { printf(" skipped=%i ]\n", counts[task_type_count]); fflush(stdout); message("nr_parts = %zi.", e->s->nr_parts); + message("nr_gparts = %zi.", e->s->nr_gparts); } /** @@ -1473,7 +1763,7 @@ void engine_collect_kick(struct cell *c) { if (c->kick != NULL) return; /* Counters for the different quantities. */ - int updated = 0; + int updated = 0, g_updated = 0; double e_kin = 0.0, e_int = 0.0, e_pot = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}, ang[3] = {0.0f, 0.0f, 0.0f}; int ti_end_min = max_nr_timesteps, ti_end_max = 0; @@ -1496,6 +1786,7 @@ void engine_collect_kick(struct cell *c) { ti_end_min = min(ti_end_min, cp->ti_end_min); ti_end_max = max(ti_end_max, cp->ti_end_max); updated += cp->updated; + g_updated += cp->g_updated; e_kin += cp->e_kin; e_int += cp->e_int; e_pot += cp->e_pot; @@ -1513,6 +1804,7 @@ void engine_collect_kick(struct cell *c) { c->ti_end_min = ti_end_min; c->ti_end_max = ti_end_max; c->updated = updated; + c->g_updated = g_updated; c->e_kin = e_kin; c->e_int = e_int; c->e_pot = e_pot; @@ -1576,7 +1868,15 @@ void engine_init_particles(struct engine *e) { /* Make sure all particles are ready to go */ /* i.e. clean-up any stupid state in the ICs */ - space_map_cells_pre(s, 1, cell_init_parts, NULL); + if ((e->policy & engine_policy_hydro) == engine_policy_hydro) { + space_map_cells_pre(s, 1, cell_init_parts, NULL); + } + if (((e->policy & engine_policy_self_gravity) == + engine_policy_self_gravity) || + ((e->policy & engine_policy_external_gravity) == + engine_policy_external_gravity)) { + space_map_cells_pre(s, 1, cell_init_gparts, NULL); + } engine_prepare(e); @@ -1643,7 +1943,7 @@ void engine_init_particles(struct engine *e) { */ void engine_step(struct engine *e) { - int updates = 0; + int updates = 0, g_updates = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; double e_pot = 0.0, e_int = 0.0, e_kin = 0.0; float mom[3] = {0.0, 0.0, 0.0}; @@ -1670,6 +1970,7 @@ void engine_step(struct engine *e) { e_int += c->e_int; e_pot += c->e_pot; updates += c->updated; + g_updates += c->g_updated; mom[0] += c->mom[0]; mom[1] += c->mom[1]; mom[2] += c->mom[2]; @@ -1695,18 +1996,20 @@ void engine_step(struct engine *e) { ti_end_max = in_i[0]; } { - double in_d[4], out_d[4]; + double in_d[5], out_d[5]; out_d[0] = updates; - out_d[1] = e_kin; - out_d[2] = e_int; - out_d[3] = e_pot; - if (MPI_Allreduce(out_d, in_d, 4, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != + out_d[1] = g_updates; + out_d[2] = e_kin; + out_d[3] = e_int; + out_d[4] = e_pot; + if (MPI_Allreduce(out_d, in_d, 5, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) != MPI_SUCCESS) error("Failed to aggregate energies."); updates = in_d[0]; - e_kin = in_d[1]; - e_int = in_d[2]; - e_pot = in_d[3]; + g_updates = in_d[1]; + e_kin = in_d[2]; + e_int = in_d[3]; + e_pot = in_d[4]; } #endif @@ -1731,8 +2034,8 @@ void engine_step(struct engine *e) { if (e->nodeID == 0) { /* Print some information to the screen */ - printf("%d %e %e %d %.3f\n", e->step, e->time, e->timeStep, updates, - e->wallclock_time); + printf("%d %e %e %d %d %.3f\n", e->step, e->time, e->timeStep, updates, + g_updates, e->wallclock_time); fflush(stdout); /* Write some energy statistics */ @@ -1934,7 +2237,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { engine_makeproxies(e); /* Re-allocate the local parts. */ - if (e->nodeID == 0) + if (e->verbose) message("Re-allocating parts array from %zi to %zi.", s->size_parts, (size_t)(s->nr_parts * 1.2)); s->size_parts = s->nr_parts * 1.2; @@ -1942,7 +2245,7 @@ void engine_split(struct engine *e, struct partition *initial_partition) { struct xpart *xparts_new = NULL; if (posix_memalign((void **)&parts_new, part_align, sizeof(struct part) * s->size_parts) != 0 || - posix_memalign((void **)&xparts_new, part_align, + posix_memalign((void **)&xparts_new, xpart_align, sizeof(struct xpart) * s->size_parts) != 0) error("Failed to allocate new part data."); memcpy(parts_new, s->parts, sizeof(struct part) * s->nr_parts); @@ -1951,6 +2254,50 @@ void engine_split(struct engine *e, struct partition *initial_partition) { free(s->xparts); s->parts = parts_new; s->xparts = xparts_new; + + /* Re-link the gparts. */ + for (size_t k = 0; k < s->nr_parts; k++) + if (s->parts[k].gpart != NULL) s->parts[k].gpart->part = &s->parts[k]; + + /* Re-allocate the local gparts. */ + if (e->verbose) + message("Re-allocating gparts array from %zi to %zi.", s->size_gparts, + (size_t)(s->nr_gparts * 1.2)); + s->size_gparts = s->nr_gparts * 1.2; + struct gpart *gparts_new = NULL; + if (posix_memalign((void **)&gparts_new, gpart_align, + sizeof(struct gpart) * s->size_gparts) != 0) + error("Failed to allocate new gpart data."); + memcpy(gparts_new, s->gparts, sizeof(struct gpart) * s->nr_gparts); + free(s->gparts); + s->gparts = gparts_new; + + /* Re-link the parts. */ + for (size_t k = 0; k < s->nr_gparts; k++) + if (s->gparts[k].id > 0) s->gparts[k].part->gpart = &s->gparts[k]; + + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < s->nr_gparts; ++k) { + + if (s->gparts[k].id > 0) { + + if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || + s->gparts[k].x[1] != s->gparts[k].part->x[1] || + s->gparts[k].x[2] != s->gparts[k].part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < s->nr_parts; ++k) { + + if (s->parts[k].gpart != NULL) { + + if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); + } + } + #else error("SWIFT was not compiled with MPI support."); #endif @@ -2126,6 +2473,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, /* Print information about the hydro scheme */ if (e->nodeID == 0) message("Hydrodynamic scheme: %s", SPH_IMPLEMENTATION); + if (e->nodeID == 0) message("Hydrodynamic kernel: %s", kernel_name); /* Check we have sensible time bounds */ if (timeBegin >= timeEnd) @@ -2134,10 +2482,12 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, "(t_beg = %e)", timeEnd, timeBegin); - /* Check we have sensible time step bounds */ + /* Check we have sensible time-step values */ if (e->dt_min > e->dt_max) error( - "Minimal time step size must be smaller than maximal time step size "); + "Minimal time-step size (%e) must be smaller than maximal time-step " + "size (%e)", + e->dt_min, e->dt_max); /* Deal with timestep */ e->timeBase = (timeEnd - timeBegin) / max_nr_timesteps; @@ -2183,8 +2533,7 @@ void engine_init(struct engine *e, struct space *s, float dt, int nr_threads, /* Construct types for MPI communications */ #ifdef WITH_MPI - part_create_mpi_type(&e->part_mpi_type); - xpart_create_mpi_type(&e->xpart_mpi_type); + part_create_mpi_types(); #endif /* First of all, init the barrier and lock it. */ diff --git a/src/engine.h b/src/engine.h index 741ae1f553494e435394f529606b4cb794b0e3d2..4d1860b9eed0203bf9bf75711ec6e6549d837fe7 100644 --- a/src/engine.h +++ b/src/engine.h @@ -62,6 +62,8 @@ extern const char *engine_policy_names[]; #define engine_maxtaskspercell 96 #define engine_maxproxies 64 #define engine_tasksreweight 10 +#define engine_parts_size_grow 1.05 +#define engine_redistribute_alloc_margin 1.2 /* The rank of the engine as a global variable (for messages). */ extern int engine_rank; @@ -160,12 +162,6 @@ struct engine { /* Are we talkative ? */ int verbose; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ @@ -182,7 +178,9 @@ void engine_init_particles(struct engine *e); void engine_step(struct engine *e); void engine_maketasks(struct engine *e); void engine_split(struct engine *e, struct partition *initial_partition); -int engine_exchange_strays(struct engine *e, int offset, size_t *ind, size_t N); +void engine_exchange_strays(struct engine *e, size_t offset_parts, + int *ind_part, size_t *Npart, size_t offset_gparts, + int *ind_gpart, size_t *Ngpart); void engine_rebuild(struct engine *e); void engine_repartition(struct engine *e); void engine_makeproxies(struct engine *e); diff --git a/src/gravity/Default/gravity.h b/src/gravity/Default/gravity.h index 4458e4012c5ea343295a97903214f732f5b8a5a7..8fcfe6490c539732b885a9c102f011aebb0aaeff 100644 --- a/src/gravity/Default/gravity.h +++ b/src/gravity/Default/gravity.h @@ -22,35 +22,43 @@ /** * @brief Computes the gravity time-step of a given particle * - * @param p Pointer to the particle data - * @param xp Pointer to the extended particle data + * @param gp Pointer to the g-particle data * */ -__attribute__((always_inline)) INLINE static float gravity_compute_timestep( - struct part* p, struct xpart* xp) { +__attribute__((always_inline)) + INLINE static float gravity_compute_timestep(struct gpart* gp) { /* Currently no limit is imposed */ return FLT_MAX; } -__attribute__((always_inline)) INLINE static void gravity_init_gpart(struct gpart* g) { - - /* zero gravity acceleration */ - g->a_grav_external[0] = 0.f; - g->a_grav_external[1] = 0.f; - g->a_grav_external[2] = 0.f; - -} - +/** + * @brief Initialises the g-particles for the first time + * + * This function is called only once just after the ICs have been + * read in to do some conversions. + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_first_init_gpart(struct gpart* gp) {} +/** + * @brief Prepares a g-particle for the gravity calculation + * + * Zeroes all the relevant arrays in preparation for the sums taking place in + * the variaous tasks + * + * @param gp The particle to act upon + */ +__attribute__((always_inline)) + INLINE static void gravity_init_part(struct gpart* gp) { -__attribute__((always_inline)) INLINE static float external_gravity_compute_timestep(struct gpart* g) { - float dtmin = FLT_MAX; -#ifdef EXTERNAL_POTENTIAL_POINTMASS - dtmin = fminf(dtmin, external_gravity_pointmass_timestep(g)); -#endif - return dtmin; + /* Zero the acceleration */ + gp->a_grav[0] = 0.f; + gp->a_grav[1] = 0.f; + gp->a_grav[2] = 0.f; } __attribute__((always_inline)) INLINE static void external_gravity(struct gpart *g) @@ -59,4 +67,12 @@ __attribute__((always_inline)) INLINE static void external_gravity(struct gpart external_gravity_pointmass(g); #endif -} +/** + * @brief Kick the additional variables + * + * @param gp The particle to act upon + * @param dt The time-step for this kick + * @param half_dt The half time-step for this kick + */ +__attribute__((always_inline)) INLINE static void gravity_kick_extra( + struct gpart* gp, float dt, float half_dt) {} diff --git a/src/gravity/Default/gravity_debug.h b/src/gravity/Default/gravity_debug.h index 98e0c40a5700b4da70f27fb0955592bb5d2287c3..654745bfeb70dddba772af9e23797713376377a7 100644 --- a/src/gravity/Default/gravity_debug.h +++ b/src/gravity/Default/gravity_debug.h @@ -24,5 +24,5 @@ __attribute__((always_inline)) "v_full=[%.3e,%.3e,%.3e] \n a=[%.3e,%.3e,%.3e],\n " "mass=%.3e t_begin=%d, t_end=%d\n", p->x[0], p->x[1], p->x[2], p->v_full[0], p->v_full[1], p->v_full[2], - p->a[0], p->a[1], p->a[2], p->mass, p->ti_begin, p->ti_end); + p->a_grav[0], p->a_grav[1], p->a_grav[2], p->mass, p->ti_begin, p->ti_end); } diff --git a/src/gravity/Default/gravity_iact.h b/src/gravity/Default/gravity_iact.h index 56da9039ff0d19825f880f765e598fcd62bff516..a9924dab275dbcf9e689dbb3a3f6627b9a2471e7 100644 --- a/src/gravity/Default/gravity_iact.h +++ b/src/gravity/Default/gravity_iact.h @@ -50,8 +50,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_grav( /* Aggregate the accelerations. */ for (k = 0; k < 3; k++) { w = acc * dx[k]; - pi->a[k] -= w * mj; - pj->a[k] += w * mi; + pi->a_grav[k] -= w * mj; + pj->a_grav[k] += w * mi; } } @@ -101,8 +101,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_vec_grav( ai.v = w.v * mj.v; aj.v = w.v * mi.v; for (j = 0; j < VEC_SIZE; j++) { - pi[j]->a[k] -= ai.f[j]; - pj[j]->a[k] += aj.f[j]; + pi[j]->a_grav[k] -= ai.f[j]; + pj[j]->a_grav[k] += aj.f[j]; } } diff --git a/src/gravity/Default/gravity_part.h b/src/gravity/Default/gravity_part.h index 9e096d5138aa75ca6de946f2d1140500b7d6c355..47aadd19585a71d04443403245cc76e28f46d80b 100644 --- a/src/gravity/Default/gravity_part.h +++ b/src/gravity/Default/gravity_part.h @@ -29,7 +29,7 @@ struct gpart { float v_full[3]; /* Particle acceleration. */ - float a[3]; + float a_grav[3]; /* Gravity acceleration */ float a_grav_external[3]; diff --git a/src/hydro/Gadget2/hydro_iact.h b/src/hydro/Gadget2/hydro_iact.h index d31b6be383b80a2698b63d27308f6fee9b23518f..09f796a8f37a9c015135f4aab3f821c2e862bdc9 100644 --- a/src/hydro/Gadget2/hydro_iact.h +++ b/src/hydro/Gadget2/hydro_iact.h @@ -93,8 +93,8 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( dv[2] = pi->v[2] - pj->v[2]; const float dvdr = dv[0] * dx[0] + dv[1] * dx[1] + dv[2] * dx[2]; - pi->div_v += faci * dvdr; - pj->div_v += facj * dvdr; + pi->div_v -= faci * dvdr; + pj->div_v -= facj * dvdr; /* Compute dv cross r */ curlvr[0] = dv[1] * dx[2] - dv[2] * dx[1]; @@ -211,10 +211,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Balsara term */ const float balsara_i = fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi); + (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); const float balsara_j = fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj); + (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); @@ -309,10 +309,10 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Balsara term */ const float balsara_i = fabsf(pi->div_v) / - (fabsf(pi->div_v) + pi->force.curl_v + 0.0001 * ci / fac_mu / hi); + (fabsf(pi->div_v) + pi->force.curl_v + 0.0001f * ci / fac_mu / hi); const float balsara_j = fabsf(pj->div_v) / - (fabsf(pj->div_v) + pj->force.curl_v + 0.0001 * cj / fac_mu / hj); + (fabsf(pj->div_v) + pj->force.curl_v + 0.0001f * cj / fac_mu / hj); /* Are the particles moving towards each others ? */ const float omega_ij = fminf(dvdr, 0.f); diff --git a/src/hydro/Minimal/hydro_iact.h b/src/hydro/Minimal/hydro_iact.h index 6afb9d8d38a4fc7f1d38b7286720ddb7f3c51ab4..b3b81a9a0dfe41e7bfafe51050d6f7cf7157e31c 100644 --- a/src/hydro/Minimal/hydro_iact.h +++ b/src/hydro/Minimal/hydro_iact.h @@ -16,8 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ -#ifndef SWIFT_RUNNER_IACT_H -#define SWIFT_RUNNER_IACT_H +#ifndef SWIFT_RUNNER_IACT_MINIMAL_H +#define SWIFT_RUNNER_IACT_MINIMAL_H /* Includes. */ #include "const.h" @@ -38,33 +38,31 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r = sqrtf(r2); - float xi, xj; - float h_inv; float wi, wj, wi_dx, wj_dx; - float mi, mj; + + const float r = sqrtf(r2); /* Get the masses. */ - mi = pi->mass; - mj = pj->mass; + const float mi = pi->mass; + const float mj = pj->mass; /* Compute density of pi. */ - h_inv = 1.0 / hi; - xi = r * h_inv; + const float hi_inv = 1.f / hi; + const float xi = r * hi_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; /* Compute density of pj. */ - h_inv = 1.f / hj; - xj = r * h_inv; + const float hj_inv = 1.f / hj; + const float xj = r * hj_inv; kernel_deval(xj, &wj, &wj_dx); pj->rho += mi * wj; - pj->rho_dh -= mi * (3.0 * wj + xj * wj_dx); + pj->rho_dh -= mi * (3.f * wj + xj * wj_dx); pj->density.wcount += wj; pj->density.wcount_dh -= xj * wj_dx; } @@ -76,24 +74,20 @@ __attribute__((always_inline)) INLINE static void runner_iact_density( __attribute__((always_inline)) INLINE static void runner_iact_nonsym_density( float r2, float *dx, float hi, float hj, struct part *pi, struct part *pj) { - float r; - float xi; - float h_inv; float wi, wi_dx; - float mj; /* Get the masses. */ - mj = pj->mass; + const float mj = pj->mass; /* Get r and r inverse. */ - r = sqrtf(r2); + const float r = sqrtf(r2); - h_inv = 1.f / hi; - xi = r * h_inv; + const float h_inv = 1.f / hi; + const float xi = r * h_inv; kernel_deval(xi, &wi, &wi_dx); pi->rho += mj * wi; - pi->rho_dh -= mj * (3.0 * wi + xi * wi_dx); + pi->rho_dh -= mj * (3.f * wi + xi * wi_dx); pi->density.wcount += wi; pi->density.wcount_dh -= xi * wi_dx; } @@ -148,7 +142,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_force( /* Compute sound speeds */ const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); - float v_sig = ci + cj + 3.f * omega_ij; + const float v_sig = ci + cj + 3.f * omega_ij; /* SPH acceleration term */ const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; @@ -225,7 +219,7 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( /* Compute sound speeds */ const float ci = sqrtf(const_hydro_gamma * pressurei / rhoi); const float cj = sqrtf(const_hydro_gamma * pressurej / rhoj); - float v_sig = ci + cj + 3.f * omega_ij; + const float v_sig = ci + cj + 3.f * omega_ij; /* SPH acceleration term */ const float sph_term = (P_over_rho_i * wi_dr + P_over_rho_j * wj_dr) * r_inv; @@ -245,4 +239,4 @@ __attribute__((always_inline)) INLINE static void runner_iact_nonsym_force( pi->force.v_sig = fmaxf(pi->force.v_sig, v_sig); } -#endif /* SWIFT_RUNNER_IACT_H */ +#endif /* SWIFT_RUNNER_IACT_MINIMAL_H */ diff --git a/src/multipole.h b/src/multipole.h index 91ba6df965ce9d3b088d538411b7f0a8555ba0e4..b7c20ddff5c3f1afc00af501a53b9659c8728ce8 100644 --- a/src/multipole.h +++ b/src/multipole.h @@ -127,7 +127,7 @@ __attribute__((always_inline)) INLINE static void multipole_iact_mp( /* Compute the forces on both multipoles. */ #if multipole_order == 1 - for (k = 0; k < 3; k++) p->a[k] += dx[k] * acc; + for (k = 0; k < 3; k++) p->a_grav[k] += dx[k] * acc; #else #error( "Multipoles of order %i not yet implemented." , multipole_order ) #endif diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000000000000000000000000000000000000..06dc819842d54d952704e4e0c40ebec5b561f691 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,265 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/* Config parameters. */ +#include "../config.h" + +/* Some standard headers. */ +/* Needs to be included so that strtok returns char * instead of a int *. */ +#include <string.h> +#include <stdlib.h> + +/* This object's header. */ +#include "parser.h" + +/* Local headers. */ +#include "error.h" + +/* Private functions. */ +static int count_char(char *str, char val); +static void parse_line(FILE *fp, struct swift_params *params); + +/** + * @brief Reads an input file and stores each parameter in a structure. + * + * @param file_name Name of file to be read + * @param params Structure to be populated from file + */ + +void parser_read_file(const char *file_name, struct swift_params *params) { + + FILE *fp; + + params->count = 0; + + /* Open file for reading */ + fp = fopen(file_name, "r"); + + if (fp == NULL) { + error("Error opening parameter file: %s", file_name); + } + + /* Read until the end of the file is reached.*/ + while (!feof(fp)) { + parse_line(fp, params); + } + + fclose(fp); +} + +/** + * @brief Counts the number of times a specific character appears in a string. + * + * @param str String to be checked + * @param val Character to be counted + */ + +static int count_char(char *str, char val) { + + int count = 0; + + /* Check if the line contains the character */ + while (*str) { + if (*str++ == val) ++count; + } + + return count; +} + +/** + * @brief Parses a line from a file and stores any parameters in a structure. + * + * @param fp File pointer to file to be read + * @param params Structure to be populated from file + * + */ + +static void parse_line(FILE *fp, struct swift_params *params) { + + char line[PARSER_MAX_LINE_SIZE]; + char trim_line[PARSER_MAX_LINE_SIZE]; + + /* Read a line of the file */ + if (fgets(line, PARSER_MAX_LINE_SIZE, fp) != NULL) { + + char *token; + /* Remove comments */ + token = strtok(line, PARSER_COMMENT_CHAR); + strcpy(trim_line, token); + + /* Check if the line contains a value */ + if (strchr(trim_line, PARSER_VALUE_CHAR)) { + /* Check for more than one parameter on the same line. */ + if (count_char(trim_line, PARSER_VALUE_CHAR) > 1) { + error("Found more than one parameter in '%s', only one allowed.", line); + } else { + /* Take first token as the parameter name. */ + token = strtok(trim_line, PARSER_VALUE_STRING); + strcpy(params->data[params->count].name, token); + + /* Take second token as the parameter value. */ + token = strtok(NULL, " #\n"); + strcpy(params->data[params->count++].value, token); + } + } + } +} + +/** + * @brief Retrieve integer parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_int(struct swift_params *params, char *name, + int *retParam) { + + char str[128]; + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%d%s", retParam, str) != 1) { + error( + "Tried parsing int '%s' but found '%s' with illegal integer " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return; + } + } + + message("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Retrieve float parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_float(struct swift_params *params, char *name, + float *retParam) { + + char str[128]; + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%f%s", retParam, str) != 1) { + error( + "Tried parsing float '%s' but found '%s' with illegal float " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return; + } + } + + message("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Retrieve double parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_double(struct swift_params *params, char *name, + double *retParam) { + + char str[128]; + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + + /* Check that exactly one number is parsed. */ + if (sscanf(params->data[i].value, "%lf", retParam) != 1) { + error( + "Tried parsing double '%s' but found '%s' with illegal double " + "characters '%s'.", + params->data[i].name, params->data[i].value, str); + } + + return; + } + } + + message("Cannot find '%s' in the structure.", name); +} + +/** + * @brief Retrieve string parameter from structure. + * + * @param params Structure that holds the parameters + * @param name Name of the parameter to be found + * @param retParam Value of the parameter found + * + */ + +void parser_get_param_string(struct swift_params *params, char *name, + char *retParam) { + + for (int i = 0; i < params->count; i++) { + + /*strcmp returns 0 if both strings are the same.*/ + if (!strcmp(name, params->data[i].name)) { + strcpy(retParam, params->data[i].value); + return; + } + } +} + +/** + * @brief Prints the contents of the parameter structure. + * + * @param params Structure that holds the parameters + * + */ + +void parser_print_params(struct swift_params *params) { + + printf("\n--------------------------\n"); + printf("| SWIFT Parameter File |\n"); + printf("--------------------------\n"); + + for (int i = 0; i < params->count; i++) { + printf("Parameter name: %s\n", params->data[i].name); + printf("Parameter value: %s\n", params->data[i].value); + } +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000000000000000000000000000000000000..2fb4148944cd423da016341744cb6d58e222182e --- /dev/null +++ b/src/parser.h @@ -0,0 +1,54 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2016 James Willis (james.s.willis@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_PARSER_H +#define SWIFT_PARSER_H + +#include <stdio.h> + +#define PARSER_MAX_LINE_SIZE 128 +#define PARSER_MAX_NO_OF_PARAMS 512 + +#define PARSER_COMMENT_CHAR "#" +#define PARSER_VALUE_CHAR ':' +#define PARSER_VALUE_STRING ":" +#define PARSER_END_OF_FILE "..." + +struct parameter { + char name[PARSER_MAX_LINE_SIZE]; + char value[PARSER_MAX_LINE_SIZE]; +}; + +struct swift_params { + struct parameter data[PARSER_MAX_NO_OF_PARAMS]; + int count; +}; + +/* Public API. */ +void parser_read_file(const char *file_name, struct swift_params *params); +void parser_print_params(struct swift_params *params); +void parser_get_param_int(struct swift_params *params, char *name, + int *retParam); +void parser_get_param_float(struct swift_params *params, char *name, + float *retParam); +void parser_get_param_double(struct swift_params *params, char *name, + double *retParam); +void parser_get_param_string(struct swift_params *params, char *name, + char *retParam); + +#endif /* SWIFT_PARSER_H */ diff --git a/src/part.c b/src/part.c index 6a99325ef23a7062fafb387fa3f3bd6b2203d057..b89abdde40fe8c7a57d1e9ac9e18fece83ba1f21 100644 --- a/src/part.c +++ b/src/part.c @@ -26,33 +26,21 @@ #endif /* This object's header. */ +#include "error.h" #include "part.h" #ifdef WITH_MPI -/** - * @brief Registers and returns an MPI type for the particles - * - * @param part_type The type container - */ -void part_create_mpi_type(MPI_Datatype* part_type) { - - /* This is not the recommended way of doing this. - One should define the structure field by field - But as long as we don't do serialization via MPI-IO - we don't really care. - Also we would have to modify this function everytime something - is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, - part_type); - MPI_Type_commit(part_type); -} +/* MPI data type for the particle transfers */ +MPI_Datatype part_mpi_type; +MPI_Datatype xpart_mpi_type; +MPI_Datatype gpart_mpi_type; +#endif +#ifdef WITH_MPI /** - * @brief Registers and returns an MPI type for the xparticles - * - * @param xpart_type The type container + * @brief Registers MPI particle types. */ -void xpart_create_mpi_type(MPI_Datatype* xpart_type) { +void part_create_mpi_types() { /* This is not the recommended way of doing this. One should define the structure field by field @@ -60,9 +48,20 @@ void xpart_create_mpi_type(MPI_Datatype* xpart_type) { we don't really care. Also we would have to modify this function everytime something is added to the part structure. */ - MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), MPI_BYTE, - xpart_type); - MPI_Type_commit(xpart_type); + if (MPI_Type_contiguous(sizeof(struct part) / sizeof(unsigned char), MPI_BYTE, + &part_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&part_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for parts."); + } + if (MPI_Type_contiguous(sizeof(struct xpart) / sizeof(unsigned char), + MPI_BYTE, &xpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&xpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for xparts."); + } + if (MPI_Type_contiguous(sizeof(struct gpart) / sizeof(unsigned char), + MPI_BYTE, &gpart_mpi_type) != MPI_SUCCESS || + MPI_Type_commit(&gpart_mpi_type) != MPI_SUCCESS) { + error("Failed to create MPI type for gparts."); + } } - #endif diff --git a/src/part.h b/src/part.h index 2117840204dbc262faf0501b3c45a167a092d6de..e76a367448a032dc6fafd982f7c6e62decf15de1 100644 --- a/src/part.h +++ b/src/part.h @@ -35,8 +35,8 @@ /* Some constants. */ #define part_align 64 -#define gpart_align 32 #define xpart_align 32 +#define gpart_align 32 /* Import the right particle definition */ #if defined(MINIMAL_SPH) @@ -54,8 +54,12 @@ #include "./gravity/Default/gravity_part.h" #ifdef WITH_MPI -void part_create_mpi_type(MPI_Datatype* part_type); -void xpart_create_mpi_type(MPI_Datatype* xpart_type); +/* MPI data type for the particle transfers */ +extern MPI_Datatype part_mpi_type; +extern MPI_Datatype xpart_mpi_type; +extern MPI_Datatype gpart_mpi_type; + +void part_create_mpi_types(); #endif #endif /* SWIFT_PART_H */ diff --git a/src/proxy.c b/src/proxy.c index f58847988946c347367a0f172048f8cc96f26914..02263a5653bdcdd2d1bf0a86523ed1a599d4bf21 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -189,20 +189,21 @@ void proxy_parts_exch1(struct proxy *p) { #ifdef WITH_MPI /* Send the number of particles. */ - if (MPI_Isend(&p->nr_parts_out, 1, MPI_INT, p->nodeID, + p->buff_out[0] = p->nr_parts_out; + p->buff_out[1] = p->nr_gparts_out; + if (MPI_Isend(p->buff_out, 2, MPI_INT, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_out) != MPI_SUCCESS) error("Failed to isend nr of parts."); - // message( "isent particle count (%i) from node %i to node %i." , - // p->nr_parts_out , p->mynodeID , p->nodeID ); fflush(stdout); + /* message( "isent particle counts [%i, %i] from node %i to node %i." , + p->buff_out[0], p->buff_out[1], p->mynodeID , p->nodeID ); fflush(stdout); */ /* Send the particle buffers. */ if (p->nr_parts_out > 0) { - if (MPI_Isend(p->parts_out, sizeof(struct part) * p->nr_parts_out, MPI_BYTE, - p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_parts, + if (MPI_Isend(p->parts_out, p->nr_parts_out, part_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, &p->req_parts_out) != MPI_SUCCESS || - MPI_Isend(p->xparts_out, sizeof(struct xpart) * p->nr_parts_out, - MPI_BYTE, p->nodeID, + MPI_Isend(p->xparts_out, p->nr_parts_out, xpart_mpi_type, p->nodeID, p->mynodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_out) != MPI_SUCCESS) error("Failed to isend part data."); @@ -213,14 +214,20 @@ void proxy_parts_exch1(struct proxy *p) { p->parts_out[k].id, p->parts_out[k].x[0], p->parts_out[k].x[1], p->parts_out[k].x[2], p->parts_out[k].h, p->nodeID);*/ } + if (p->nr_gparts_out > 0) { + if (MPI_Isend(p->gparts_out, p->nr_gparts_out, gpart_mpi_type, p->nodeID, + p->mynodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_out) != MPI_SUCCESS) + error("Failed to isend part data."); + // message( "isent gpart data (%i) to node %i." , p->nr_parts_out , + // p->nodeID ); fflush(stdout); + } /* Receive the number of particles. */ - if (MPI_Irecv(&p->nr_parts_in, 1, MPI_INT, p->nodeID, + if (MPI_Irecv(p->buff_in, 2, MPI_INT, p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_count, MPI_COMM_WORLD, &p->req_parts_count_in) != MPI_SUCCESS) error("Failed to irecv nr of parts."); -// message( "irecv particle count on node %i from node %i." , p->mynodeID , -// p->nodeID ); fflush(stdout); #else error("SWIFT was not compiled with MPI support."); @@ -231,6 +238,10 @@ void proxy_parts_exch2(struct proxy *p) { #ifdef WITH_MPI + /* Unpack the incomming parts counts. */ + p->nr_parts_in = p->buff_in[0]; + p->nr_gparts_in = p->buff_in[1]; + /* Is there enough space in the buffer? */ if (p->nr_parts_in > p->size_parts_in) { do { @@ -244,19 +255,36 @@ void proxy_parts_exch2(struct proxy *p) { p->size_parts_in)) == NULL) error("Failed to re-allocate parts_in buffers."); } + if (p->nr_gparts_in > p->size_gparts_in) { + do { + p->size_gparts_in *= proxy_buffgrow; + } while (p->nr_gparts_in > p->size_gparts_in); + free(p->gparts_in); + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to re-allocate gparts_in buffers."); + } /* Receive the particle buffers. */ if (p->nr_parts_in > 0) { - if (MPI_Irecv(p->parts_in, sizeof(struct part) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_parts, - MPI_COMM_WORLD, &p->req_parts_in) != MPI_SUCCESS || - MPI_Irecv(p->xparts_in, sizeof(struct xpart) * p->nr_parts_in, MPI_BYTE, - p->nodeID, p->nodeID * proxy_tag_shift + proxy_tag_xparts, + if (MPI_Irecv(p->parts_in, p->nr_parts_in, part_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_parts, MPI_COMM_WORLD, + &p->req_parts_in) != MPI_SUCCESS || + MPI_Irecv(p->xparts_in, p->nr_parts_in, xpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_xparts, MPI_COMM_WORLD, &p->req_xparts_in) != MPI_SUCCESS) error("Failed to irecv part data."); // message( "irecv particle data (%i) from node %i." , p->nr_parts_in , // p->nodeID ); fflush(stdout); } + if (p->nr_gparts_in > 0) { + if (MPI_Irecv(p->gparts_in, p->nr_gparts_in, gpart_mpi_type, p->nodeID, + p->nodeID * proxy_tag_shift + proxy_tag_gparts, + MPI_COMM_WORLD, &p->req_gparts_in) != MPI_SUCCESS) + error("Failed to irecv gpart data."); + // message( "irecv gpart data (%i) from node %i." , p->nr_gparts_in , + // p->nodeID ); fflush(stdout); + } #else error("SWIFT was not compiled with MPI support."); @@ -303,6 +331,37 @@ void proxy_parts_load(struct proxy *p, const struct part *parts, p->nr_parts_out += N; } +/** + * @brief Load parts onto a proxy for exchange. + * + * @param p The #proxy. + * @param gparts Pointer to an array of #gpart to send. + * @param N The number of parts. + */ + +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N) { + + /* Is there enough space in the buffer? */ + if (p->nr_gparts_out + N > p->size_gparts_out) { + do { + p->size_gparts_out *= proxy_buffgrow; + } while (p->nr_gparts_out + N > p->size_gparts_out); + struct gpart *tp; + if ((tp = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to re-allocate gparts_out buffers."); + memcpy(tp, p->gparts_out, sizeof(struct gpart) * p->nr_gparts_out); + free(p->gparts_out); + p->gparts_out = tp; + } + + /* Copy the parts and xparts data to the buffer. */ + memcpy(&p->gparts_out[p->nr_gparts_out], gparts, sizeof(struct gpart) * N); + + /* Increase the counters. */ + p->nr_gparts_out += N; +} + /** * @brief Initialize the given proxy. * @@ -352,4 +411,20 @@ void proxy_init(struct proxy *p, int mynodeID, int nodeID) { error("Failed to allocate parts_out buffers."); } p->nr_parts_out = 0; + + /* Allocate the gpart send and receive buffers, if needed. */ + if (p->gparts_in == NULL) { + p->size_gparts_in = proxy_buffinit; + if ((p->gparts_in = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_in)) == NULL) + error("Failed to allocate gparts_in buffers."); + } + p->nr_gparts_in = 0; + if (p->gparts_out == NULL) { + p->size_gparts_out = proxy_buffinit; + if ((p->gparts_out = (struct gpart *)malloc(sizeof(struct gpart) * + p->size_gparts_out)) == NULL) + error("Failed to allocate gparts_out buffers."); + } + p->nr_gparts_out = 0; } diff --git a/src/proxy.h b/src/proxy.h index 1a0272dd6c948df0130f733bb573477eeff1d0db..5a747187e05a78a109ce4523ebb3c9d5fe2ad717 100644 --- a/src/proxy.h +++ b/src/proxy.h @@ -32,7 +32,8 @@ #define proxy_tag_count 0 #define proxy_tag_parts 1 #define proxy_tag_xparts 2 -#define proxy_tag_cells 3 +#define proxy_tag_gparts 3 +#define proxy_tag_cells 4 /* Data structure for the proxy. */ struct proxy { @@ -53,14 +54,21 @@ struct proxy { /* The parts and xparts buffers for input and output. */ struct part *parts_in, *parts_out; struct xpart *xparts_in, *xparts_out; + struct gpart *gparts_in, *gparts_out; int size_parts_in, size_parts_out; int nr_parts_in, nr_parts_out; + int size_gparts_in, size_gparts_out; + int nr_gparts_in, nr_gparts_out; + + /* Buffer to hold the incomming/outgoing particle counts. */ + int buff_out[2], buff_in[2]; /* MPI request handles. */ #ifdef WITH_MPI MPI_Request req_parts_count_out, req_parts_count_in; MPI_Request req_parts_out, req_parts_in; MPI_Request req_xparts_out, req_xparts_in; + MPI_Request req_gparts_out, req_gparts_in; MPI_Request req_cells_count_out, req_cells_count_in; MPI_Request req_cells_out, req_cells_in; #endif @@ -70,6 +78,7 @@ struct proxy { void proxy_init(struct proxy *p, int mynodeID, int nodeID); void proxy_parts_load(struct proxy *p, const struct part *parts, const struct xpart *xparts, int N); +void proxy_gparts_load(struct proxy *p, const struct gpart *gparts, int N); void proxy_parts_exch1(struct proxy *p); void proxy_parts_exch2(struct proxy *p); void proxy_addcell_in(struct proxy *p, struct cell *c); diff --git a/src/runner.c b/src/runner.c index cd4ed426f3318bc5aa981d9fa07ca4d17bf1ebfc..3341f3213261ff8d81625e9dc2103148b169bd32 100644 --- a/src/runner.c +++ b/src/runner.c @@ -171,7 +171,7 @@ void runner_dograv_external(struct runner *r, struct cell *c) { /** - * @Brief Sort the entries in ascending order using QuickSort. + * @brief Sort the entries in ascending order using QuickSort. * * @param sort The entries * @param N The number of entries. @@ -559,9 +559,10 @@ void runner_dogsort(struct runner *r, struct cell *c, int flags, int clock) { void runner_doinit(struct runner *r, struct cell *c, int timer) { - struct part *p, *parts = c->parts; - struct gpart *g, *gparts = c->gparts; - const int count = c->count, gcount = c->gcount; + struct part *const parts = c->parts; + struct gpart *const gparts = c->gparts; + const int count = c->count; + const int gcount = c->gcount; const int ti_current = r->e->ti_current; TIMER_TIC; @@ -577,25 +578,25 @@ void runner_doinit(struct runner *r, struct cell *c, int timer) { for (int i = 0; i < count; i++) { /* Get a direct pointer on the part. */ - p = &parts[i]; + struct part *const p = &parts[i]; if (p->ti_end <= ti_current) { /* Get ready for a density calculation */ hydro_init_part(p); } - } + } - /* Loop over the parts in this cell. */ + /* Loop over the gparts in this cell. */ for (int i = 0; i < gcount; i++) { /* Get a direct pointer on the part. */ - g = &gparts[i]; + struct gpart *const gp = &gparts[i]; - if (g->ti_end <= ti_current) { + if (gp->ti_end <= ti_current) { /* Get ready for a density calculation */ - gravity_init_gpart(g); + gravity_init_part(gp); } } } @@ -753,7 +754,7 @@ void runner_doghost(struct runner *r, struct cell *c) { } /** - * @brief Drift particles forward in time + * @brief Drift particles and g-particles forward in time * * @param r The runner thread. * @param c The cell. @@ -765,14 +766,12 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { const int nr_gparts = c->gcount; const double timeBase = r->e->timeBase; const double dt = (r->e->ti_current - r->e->ti_old) * timeBase; - const float ti_old = r->e->ti_old; - const float ti_current = r->e->ti_current; - struct part *restrict p, *restrict parts = c->parts; - struct xpart *restrict xp, *restrict xparts = c->xparts; - struct gpart *restrict g, - *restrict gparts = c->gparts; + const int ti_old = r->e->ti_old; + const int ti_current = r->e->ti_current; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; float dx_max = 0.f, dx2_max = 0.f, h_max = 0.f; - float w; TIMER_TIC #ifdef TASK_VERBOSE @@ -782,12 +781,24 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { /* No children? */ if (!c->split) { - /* Loop over all the particles in the cell */ + /* Loop over all the g-particles in the cell */ + for (int k = 0; k < nr_gparts; ++k) { + + /* Get a handle on the gpart. */ + struct gpart *const gp = &gparts[k]; + + /* Drift... */ + gp->x[0] += gp->v_full[0] * dt; + gp->x[1] += gp->v_full[1] * dt; + gp->x[2] += gp->v_full[2] * dt; + } + + /* Loop over all the particles in the cell (more work for these !) */ for (int k = 0; k < nr_parts; k++) { /* Get a handle on the part. */ - p = &parts[k]; - xp = &xparts[k]; + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; /* Useful quantity */ const float h_inv = 1.0f / p->h; @@ -803,18 +814,18 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { p->v[2] += p->a_hydro[2] * dt; /* Predict smoothing length */ - w = p->h_dt * h_inv * dt; - if (fabsf(w) < 0.2f) - p->h *= approx_expf(w); /* 4th order expansion of exp(w) */ + const float w1 = p->h_dt * h_inv * dt; + if (fabsf(w1) < 0.2f) + p->h *= approx_expf(w1); /* 4th order expansion of exp(w) */ else - p->h *= expf(w); + p->h *= expf(w1); /* Predict density */ - w = -3.0f * p->h_dt * h_inv * dt; - if (fabsf(w) < 0.2f) - p->rho *= approx_expf(w); /* 4th order expansion of exp(w) */ + const float w2 = -3.0f * p->h_dt * h_inv * dt; + if (fabsf(w2) < 0.2f) + p->rho *= approx_expf(w2); /* 4th order expansion of exp(w) */ else - p->rho *= expf(w); + p->rho *= expf(w2); /* Predict the values of the extra fields */ hydro_predict_extra(p, xp, ti_old, ti_current, timeBase); @@ -829,17 +840,6 @@ void runner_dodrift(struct runner *r, struct cell *c, int timer) { h_max = fmaxf(p->h, h_max); } - /* Loop over all gparts in the cell */ - for (int k = 0; k < nr_gparts; k++) { - g = &gparts[k]; - if (g->id == 0) - message(" dt= %f tx= %f tv=%f \n",dt, g->tx, g->tv); - g->x[0] += g->v_full[0] * dt; - g->x[1] += g->v_full[1] * dt; - g->x[2] += g->v_full[2] * dt; - g->tx += dt; - } - /* Now, get the maximal particle motion from its square */ dx_max = sqrtf(dx2_max); } @@ -882,21 +882,17 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { const double timeBase_inv = 1.0 / r->e->timeBase; const int count = c->count; const int gcount = c->gcount; + struct part *const parts = c->parts; + struct xpart *const xparts = c->xparts; + struct gpart *const gparts = c->gparts; const int is_fixdt = (r->e->policy & engine_policy_fixdt) == engine_policy_fixdt; - int new_dti; - int dti_timeline; - - int updated = 0; + int updated = 0, g_updated = 0; int ti_end_min = max_nr_timesteps, ti_end_max = 0; double e_kin = 0.0, e_int = 0.0, e_pot = 0.0, mass = 0.0; float mom[3] = {0.0f, 0.0f, 0.0f}; float ang[3] = {0.0f, 0.0f, 0.0f}; - float x[3], v_full[3]; - struct part *restrict p, *restrict parts = c->parts; - struct xpart *restrict xp, *restrict xparts = c->xparts; - struct gpart *restrict g, *restrict gparts = c->gparts; TIMER_TIC @@ -907,17 +903,79 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* No children? */ if (!c->split) { + /* Loop over the g-particles and kick the active ones. */ + for (int k = 0; k < gcount; k++) { + + /* Get a handle on the part. */ + struct gpart *const gp = &gparts[k]; + + /* If the g-particle has no counterpart and needs to be kicked */ + if (gp->id < 0 && (is_fixdt || gp->ti_end <= ti_current)) { + + /* First, finish the force calculation */ + gravity_end_force(gp); + + /* Now we are ready to compute the next time-step size */ + int new_dti; + + if (is_fixdt) { + + /* Now we have a time step, proceed with the kick */ + new_dti = global_dt_max * timeBase_inv; + + } else { + + /* Compute the next timestep (gravity condition) */ + float new_dt = gravity_compute_timestep(gp); + + /* Limit timestep within the allowed range */ + new_dt = fminf(new_dt, global_dt_max); + new_dt = fmaxf(new_dt, global_dt_min); + + /* Convert to integer time */ + new_dti = new_dt * timeBase_inv; + + /* Recover the current timestep */ + const int current_dti = gp->ti_end - gp->ti_begin; + + /* Limit timestep increase */ + if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); + + /* Put this timestep on the time line */ + int dti_timeline = max_nr_timesteps; + while (new_dti < dti_timeline) dti_timeline /= 2; + + /* Now we have a time step, proceed with the kick */ + new_dti = dti_timeline; + } + + /* Compute the time step for this kick */ + const int ti_start = (gp->ti_begin + gp->ti_end) / 2; + const int ti_end = gp->ti_end + new_dti / 2; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - gp->ti_end) * timeBase; + + /* Kick particles in momentum space */ + gp->v_full[0] += gp->a_grav[0] * dt; + gp->v_full[1] += gp->a_grav[1] * dt; + gp->v_full[2] += gp->a_grav[2] * dt; + + /* Extra kick work */ + gravity_kick_extra(gp, dt, half_dt); + + /* Number of updated g-particles */ + g_updated++; + } + } + + /* Now do the hydro ones... */ + /* Loop over the particles and kick the active ones. */ for (int k = 0; k < count; k++) { /* Get a handle on the part. */ - p = &parts[k]; - xp = &xparts[k]; - - const float m = p->mass; - x[0] = p->x[0]; - x[1] = p->x[1]; - x[2] = p->x[2]; + struct part *const p = &parts[k]; + struct xpart *const xp = &xparts[k]; /* If particle needs to be kicked */ if (is_fixdt || p->ti_end <= ti_current) { @@ -927,8 +985,10 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* And do the same of the extra variable */ hydro_end_force(p); + if (p->gpart != NULL) gravity_end_force(p->gpart); /* Now we are ready to compute the next time-step size */ + int new_dti; if (is_fixdt) { @@ -937,9 +997,13 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { } else { - /* Compute the next timestep */ + /* Compute the next timestep (hydro condition) */ const float new_dt_hydro = hydro_compute_timestep(p, xp); - const float new_dt_grav = gravity_compute_timestep(p, xp); + + /* Compute the next timestep (gravity condition) */ + float new_dt_grav = FLT_MAX; + if (p->gpart != NULL) + new_dt_grav = gravity_compute_timestep(p->gpart); float new_dt = fminf(new_dt_hydro, new_dt_grav); @@ -964,7 +1028,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); /* Put this timestep on the time line */ - dti_timeline = max_nr_timesteps; + int dti_timeline = max_nr_timesteps; while (new_dti < dti_timeline) dti_timeline /= 2; /* Now we have a time step, proceed with the kick */ @@ -974,34 +1038,51 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Compute the time step for this kick */ const int ti_start = (p->ti_begin + p->ti_end) / 2; const int ti_end = p->ti_end + new_dti / 2; - const float dt = (ti_end - ti_start) * timeBase; - const float half_dt = (ti_end - p->ti_end) * timeBase; + const double dt = (ti_end - ti_start) * timeBase; + const double half_dt = (ti_end - p->ti_end) * timeBase; /* Move particle forward in time */ p->ti_begin = p->ti_end; p->ti_end = p->ti_begin + new_dti; + /* Get the acceleration */ + float a_tot[3] = {p->a_hydro[0], p->a_hydro[1], p->a_hydro[2]}; + if (p->gpart != NULL) { + a_tot[0] += p->gpart->a_grav[0]; + a_tot[1] += p->gpart->a_grav[1]; + a_tot[1] += p->gpart->a_grav[2]; + } + /* Kick particles in momentum space */ - xp->v_full[0] += p->a_hydro[0] * dt; - xp->v_full[1] += p->a_hydro[1] * dt; - xp->v_full[2] += p->a_hydro[2] * dt; + xp->v_full[0] += a_tot[0] * dt; + xp->v_full[1] += a_tot[1] * dt; + xp->v_full[2] += a_tot[2] * dt; + + if (p->gpart != NULL) { + p->gpart->v_full[0] = xp->v_full[0]; + p->gpart->v_full[1] = xp->v_full[1]; + p->gpart->v_full[2] = xp->v_full[2]; + } - p->v[0] = xp->v_full[0] - half_dt * p->a_hydro[0]; - p->v[1] = xp->v_full[1] - half_dt * p->a_hydro[1]; - p->v[2] = xp->v_full[2] - half_dt * p->a_hydro[2]; + /* Go back by half-step for the hydro velocity */ + p->v[0] = xp->v_full[0] - half_dt * a_tot[0]; + p->v[1] = xp->v_full[1] - half_dt * a_tot[1]; + p->v[2] = xp->v_full[2] - half_dt * a_tot[2]; /* Extra kick work */ hydro_kick_extra(p, xp, dt, half_dt); + if (p->gpart != NULL) gravity_kick_extra(p->gpart, dt, half_dt); /* Number of updated particles */ updated++; + if (p->gpart != NULL) g_updated++; } /* Now collect quantities for statistics */ - v_full[0] = xp->v_full[0]; - v_full[1] = xp->v_full[1]; - v_full[2] = xp->v_full[2]; + const double x[3] = {p->x[0], p->x[1], p->x[2]}; + const float v_full[3] = {xp->v_full[0], xp->v_full[1], xp->v_full[2]}; + const float m = p->mass; /* Collect mass */ mass += m; @@ -1026,75 +1107,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { ti_end_min = min(p->ti_end, ti_end_min); ti_end_max = max(p->ti_end, ti_end_max); } - /* For the moment we loop of gpart particles comppletely independently - this should be changed */ - - for(int k = 0; k < gcount; k++) { - g = &gparts[k]; - - x[0] = g->x[0]; - x[1] = g->x[1]; - x[2] = g->x[2]; - - /* If particle needs to be kicked */ - if (is_fixdt || g->ti_end <= ti_current) { - - if (is_fixdt) { - - /* Now we have a time step, proceed with the kick */ - new_dti = global_dt_max * timeBase_inv; - - } - else { - /* need to calculate gravity step - todo */ - const float new_dt_external = external_gravity_compute_timestep(g); - - float new_dt = fminf(new_dt_external, global_dt_max); - new_dt = fmaxf(new_dt_external, global_dt_min); - - /* Convert to integer time */ - new_dti = new_dt * timeBase_inv; - - /* Recover the current timestep */ - const int current_dti = g->ti_end - g->ti_begin; - - /* Limit timestep increase */ - if (current_dti > 0) new_dti = min(new_dti, 2 * current_dti); - - /* Put this timestep on the time line */ - dti_timeline = max_nr_timesteps; - while (new_dti < dti_timeline) dti_timeline /= 2; - - /* Now we have a time step, proceed with the kick */ - new_dti = dti_timeline; - } - - /* Compute the time step for this kick */ - const int ti_start = (g->ti_begin + g->ti_end) / 2; - const int ti_end = g->ti_end + new_dti / 2; - const float dt = (ti_end - ti_start) * timeBase; - - /* Move particle forward in time */ - g->ti_begin = g->ti_end; - g->ti_end = g->ti_begin + new_dti; - - if(g->id == 0) - message(" dt= %f tx= %f tv=%f \n",dt, g->tx, g->tv); - - /* Kick particles in momentum space */ - g->v_full[0] += g->a_grav_external[0] * dt; - g->v_full[1] += g->a_grav_external[1] * dt; - g->v_full[2] += g->a_grav_external[2] * dt; - g->tv += dt; - - /* Minimal time for next end of time-step */ - ti_end_min = min(g->ti_end, ti_end_min); - ti_end_max = max(g->ti_end, ti_end_max); - /* Number of updated particles */ - updated++; - - } - } } /* Otherwise, aggregate data from children. */ @@ -1103,13 +1116,14 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Loop over the progeny. */ for (int k = 0; k < 8; k++) if (c->progeny[k] != NULL) { - struct cell *cp = c->progeny[k]; + struct cell *const cp = c->progeny[k]; /* Recurse */ runner_dokick(r, cp, 0); /* And aggregate */ updated += cp->updated; + g_updated += cp->g_updated; e_kin += cp->e_kin; e_int += cp->e_int; e_pot += cp->e_pot; @@ -1127,6 +1141,7 @@ void runner_dokick(struct runner *r, struct cell *c, int timer) { /* Store the values. */ c->updated = updated; + c->g_updated = g_updated; c->e_kin = e_kin; c->e_int = e_int; c->e_pot = e_pot; diff --git a/src/runner_doiact.h b/src/runner_doiact.h index cf5d56e94169b44e6cd2974a3422a0bc5e4610ac..de339db6133fcc829bdc6ee0ce9e537b68982422 100644 --- a/src/runner_doiact.h +++ b/src/runner_doiact.h @@ -1235,7 +1235,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj) { #else /* Does pi need to be updated too? */ - if (pi->dt <= dt_step) { + if (pi->ti_end <= ti_current) { /* Add this interaction to the symmetric queue. */ r2q2[icount2] = r2; diff --git a/src/runner_doiact_grav.h b/src/runner_doiact_grav.h index f374339da75e31b39a5295fcd8bbc23c34d8d67d..02626295a49f314fef840bc044a476f5c9cf332d 100644 --- a/src/runner_doiact_grav.h +++ b/src/runner_doiact_grav.h @@ -267,9 +267,9 @@ void runner_dograv_down(struct runner *r, struct cell *c) { /* Apply the multipole acceleration to all gparts. */ for (int k = 0; k < c->gcount; k++) { struct gpart *p = &c->gparts[k]; - p->a[0] += m->a[0]; - p->a[1] += m->a[1]; - p->a[2] += m->a[2]; + p->a_grav[0] += m->a[0]; + p->a_grav[1] += m->a[1]; + p->a_grav[2] += m->a[2]; } } } @@ -594,5 +594,4 @@ void runner_dosub_grav(struct runner *r, struct cell *ci, struct cell *cj, if (gettimer) TIMER_TOC(timer_dosub_grav); } - #endif /* SWIFT_RUNNER_DOIACT_GRAV_H */ diff --git a/src/scheduler.c b/src/scheduler.c index c7267197ad67abdd7983e8ea6c819016d31faf63..38a1cd8c663307e0c0378d8bec2e0cd3d8f37fa8 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -1096,7 +1096,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI - err = MPI_Irecv(t->ci->parts, t->ci->count, s->part_mpi_type, + err = MPI_Irecv(t->ci->parts, t->ci->count, part_mpi_type, t->ci->nodeID, t->flags, MPI_COMM_WORLD, &t->req); if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); @@ -1111,7 +1111,7 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_send: #ifdef WITH_MPI - err = MPI_Isend(t->ci->parts, t->ci->count, s->part_mpi_type, + err = MPI_Isend(t->ci->parts, t->ci->count, part_mpi_type, t->cj->nodeID, t->flags, MPI_COMM_WORLD, &t->req); if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); @@ -1352,12 +1352,6 @@ void scheduler_init(struct scheduler *s, struct space *space, int nr_tasks, s->tasks = NULL; s->tasks_ind = NULL; scheduler_reset(s, nr_tasks); - -/* Construct types for MPI communications */ -#ifdef WITH_MPI - part_create_mpi_type(&s->part_mpi_type); - xpart_create_mpi_type(&s->xpart_mpi_type); -#endif } /** diff --git a/src/scheduler.h b/src/scheduler.h index 9c2a5bd7307817bd536930200ad08e7e37502e08..64c694aea295c13810a20b626055fc6c15eb0af8 100644 --- a/src/scheduler.h +++ b/src/scheduler.h @@ -100,12 +100,6 @@ struct scheduler { /* The node we are working on. */ int nodeID; - -#ifdef WITH_MPI - /* MPI data type for the particle transfers */ - MPI_Datatype part_mpi_type; - MPI_Datatype xpart_mpi_type; -#endif }; /* Function prototypes. */ diff --git a/src/space.c b/src/space.c index 1db5f6bc238364943de3ce951713cd9efa2fc929..353bab9eda5b775deb60623c721aad88998d1f58 100644 --- a/src/space.c +++ b/src/space.c @@ -312,7 +312,7 @@ void space_regrid(struct space *s, double cell_max, int verbose) { void space_rebuild(struct space *s, double cell_max, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); /* Be verbose about this. */ message( "re)building space..." ); fflush(stdout); @@ -324,23 +324,15 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { int nr_gparts = s->nr_gparts; struct cell *restrict cells = s->cells; - double ih[3], dim[3]; - int cdim[3]; - ih[0] = s->ih[0]; - ih[1] = s->ih[1]; - ih[2] = s->ih[2]; - dim[0] = s->dim[0]; - dim[1] = s->dim[1]; - dim[2] = s->dim[2]; - cdim[0] = s->cdim[0]; - cdim[1] = s->cdim[1]; - cdim[2] = s->cdim[2]; + const double ih[3] = {s->ih[0], s->ih[1], s->ih[2]}; + const double dim[3] = {s->dim[0], s->dim[1], s->dim[2]}; + const int cdim[3] = {s->cdim[0], s->cdim[1], s->cdim[2]}; /* Run through the particles and get their cell index. */ // tic = getticks(); const size_t ind_size = s->size_parts; - size_t *ind; - if ((ind = (size_t *)malloc(sizeof(size_t) * ind_size)) == NULL) + int *ind; + if ((ind = (int *)malloc(sizeof(int) * ind_size)) == NULL) error("Failed to allocate temporary particle indices."); for (int k = 0; k < nr_parts; k++) { struct part *restrict p = &s->parts[k]; @@ -353,37 +345,91 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count++; } + // message( "getting particle indices took %.3f %s." , + // clocks_from_ticks(getticks() - tic), clocks_getunit()): + + /* Run through the gravity particles and get their cell index. */ + // tic = getticks(); + const size_t gind_size = s->size_gparts; + int *gind; + if ((gind = (int *)malloc(sizeof(int) * gind_size)) == NULL) + error("Failed to allocate temporary g-particle indices."); + for (int k = 0; k < nr_gparts; k++) { + struct gpart *restrict gp = &s->gparts[k]; + for (int j = 0; j < 3; j++) + if (gp->x[j] < 0.0) + gp->x[j] += dim[j]; + else if (gp->x[j] >= dim[j]) + gp->x[j] -= dim[j]; + gind[k] = + cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); + cells[gind[k]].gcount++; + } // message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()): +// clocks_from_ticks(getticks() - tic), clocks_getunit()); #ifdef WITH_MPI /* Move non-local parts to the end of the list. */ - const int nodeID = s->e->nodeID; + const int local_nodeID = s->e->nodeID; for (int k = 0; k < nr_parts; k++) - if (cells[ind[k]].nodeID != nodeID) { + if (cells[ind[k]].nodeID != local_nodeID) { cells[ind[k]].count -= 1; nr_parts -= 1; - struct part tp = s->parts[k]; + const struct part tp = s->parts[k]; s->parts[k] = s->parts[nr_parts]; s->parts[nr_parts] = tp; - struct xpart txp = s->xparts[k]; + if (s->parts[k].gpart != NULL) { + s->parts[k].gpart->part = &s->parts[k]; + } + if (s->parts[nr_parts].gpart != NULL) { + s->parts[nr_parts].gpart->part = &s->parts[nr_parts]; + } + const struct xpart txp = s->xparts[k]; s->xparts[k] = s->xparts[nr_parts]; s->xparts[nr_parts] = txp; - int t = ind[k]; + const int t = ind[k]; ind[k] = ind[nr_parts]; ind[nr_parts] = t; } + /* Move non-local gparts to the end of the list. */ + for (int k = 0; k < nr_gparts; k++) + if (cells[gind[k]].nodeID != local_nodeID) { + cells[gind[k]].gcount -= 1; + nr_gparts -= 1; + const struct gpart tp = s->gparts[k]; + s->gparts[k] = s->gparts[nr_gparts]; + s->gparts[nr_gparts] = tp; + if (s->gparts[k].id > 0) { + s->gparts[k].part->gpart = &s->gparts[k]; + } + if (s->gparts[nr_gparts].id > 0) { + s->gparts[nr_gparts].part->gpart = &s->gparts[nr_gparts]; + } + const int t = gind[k]; + gind[k] = gind[nr_gparts]; + gind[nr_gparts] = t; + } + /* Exchange the strays, note that this potentially re-allocates the parts arrays. */ - s->nr_parts = - nr_parts + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], - s->nr_parts - nr_parts); + /* TODO: This function also exchanges gparts, but this is shorted-out + until they are fully implemented. */ + size_t nr_parts_exchanged = s->nr_parts - nr_parts; + size_t nr_gparts_exchanged = s->nr_gparts - nr_gparts; + engine_exchange_strays(s->e, nr_parts, &ind[nr_parts], &nr_parts_exchanged, + nr_gparts, &gind[nr_gparts], &nr_gparts_exchanged); + + /* Add post-processing, i.e. re-linking/creating of gparts here. */ + + /* Set the new particle counts. */ + s->nr_parts = nr_parts + nr_parts_exchanged; + s->nr_gparts = nr_gparts + nr_gparts_exchanged; /* Re-allocate the index array if needed.. */ if (s->nr_parts > ind_size) { - size_t *ind_new; - if ((ind_new = (size_t *)malloc(sizeof(size_t) * s->nr_parts)) == NULL) + int *ind_new; + if ((ind_new = (int *)malloc(sizeof(int) * s->nr_parts)) == NULL) error("Failed to allocate temporary particle indices."); memcpy(ind_new, ind, sizeof(size_t) * nr_parts); free(ind); @@ -392,7 +438,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* Assign each particle to its cell. */ for (int k = nr_parts; k < s->nr_parts; k++) { - struct part *p = &s->parts[k]; + const struct part *const p = &s->parts[k]; ind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); cells[ind[k]].count += 1; @@ -423,65 +469,24 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(ind); - /* Run through the gravity particles and get their cell index. */ - // tic = getticks(); - const size_t gind_size = s->size_gparts; - size_t *gind; - if ((gind = (size_t *)malloc(sizeof(size_t) * gind_size)) == NULL) - error("Failed to allocate temporary g-particle indices."); - for (int k = 0; k < nr_gparts; k++) { - struct gpart *gp = &s->gparts[k]; - for (int j = 0; j < 3; j++) - if (gp->x[j] < 0.0) - gp->x[j] += dim[j]; - else if (gp->x[j] >= dim[j]) - gp->x[j] -= dim[j]; - gind[k] = - cell_getid(cdim, gp->x[0] * ih[0], gp->x[1] * ih[1], gp->x[2] * ih[2]); - cells[gind[k]].gcount++; - } -// message( "getting particle indices took %.3f %s." , -// clocks_from_ticks(getticks() - tic), clocks_getunit()); - #ifdef WITH_MPI - /* Move non-local gparts to the end of the list. */ - for (int k = 0; k < nr_gparts; k++) - if (cells[ind[k]].nodeID != nodeID) { - cells[ind[k]].gcount -= 1; - nr_gparts -= 1; - struct gpart tp = s->gparts[k]; - s->gparts[k] = s->gparts[nr_gparts]; - s->gparts[nr_gparts] = tp; - int t = ind[k]; - ind[k] = ind[nr_gparts]; - ind[nr_gparts] = t; - } - - /* Exchange the strays, note that this potentially re-allocates - the parts arrays. */ - // s->nr_gparts = - // nr_gparts + engine_exchange_strays(s->e, nr_gparts, &ind[nr_gparts], - // s->nr_gparts - nr_gparts); - if (nr_gparts > 0) - error("Need to implement the exchange of strays for the gparts"); - /* Re-allocate the index array if needed.. */ if (s->nr_gparts > gind_size) { - size_t *gind_new; - if ((gind_new = (size_t *)malloc(sizeof(size_t) * s->nr_gparts)) == NULL) + int *gind_new; + if ((gind_new = (int *)malloc(sizeof(int) * s->nr_gparts)) == NULL) error("Failed to allocate temporary g-particle indices."); - memcpy(gind_new, gind, sizeof(size_t) * nr_gparts); + memcpy(gind_new, gind, sizeof(int) * nr_gparts); free(gind); gind = gind_new; } /* Assign each particle to its cell. */ for (int k = nr_gparts; k < s->nr_gparts; k++) { - struct gpart *p = &s->gparts[k]; + const struct gpart *const p = &s->gparts[k]; gind[k] = cell_getid(cdim, p->x[0] * ih[0], p->x[1] * ih[1], p->x[2] * ih[2]); - cells[gind[k]].count += 1; + cells[gind[k]].gcount += 1; /* if ( cells[ ind[k] ].nodeID != nodeID ) error( "Received part that does not belong to me (nodeID=%i)." , cells[ ind[k] ].nodeID ); */ @@ -501,6 +506,28 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { /* We no longer need the indices as of here. */ free(gind); + /* Verify that the links are correct */ + /* MATTHIEU: To be commented out once we are happy */ + for (size_t k = 0; k < nr_gparts; ++k) { + + if (s->gparts[k].id > 0) { + + if (s->gparts[k].part->gpart != &s->gparts[k]) error("Linking problem !"); + + if (s->gparts[k].x[0] != s->gparts[k].part->x[0] || + s->gparts[k].x[1] != s->gparts[k].part->x[1] || + s->gparts[k].x[2] != s->gparts[k].part->x[2]) + error("Linked particles are not at the same position !"); + } + } + for (size_t k = 0; k < nr_parts; ++k) { + + if (s->parts[k].gpart != NULL) { + + if (s->parts[k].gpart->part != &s->parts[k]) error("Linking problem !"); + } + } + /* Hook the cells up to the parts. */ // tic = getticks(); struct part *finger = s->parts; @@ -536,7 +563,7 @@ void space_rebuild(struct space *s, double cell_max, int verbose) { */ void space_split(struct space *s, struct cell *cells, int verbose) { - ticks tic = getticks(); + const ticks tic = getticks(); for (int k = 0; k < s->nr_cells; k++) scheduler_addtask(&s->e->sched, task_type_split_cell, task_subtype_none, k, @@ -560,7 +587,7 @@ void space_split(struct space *s, struct cell *cells, int verbose) { * @param verbose Are we talkative ? */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose) { ticks tic = getticks(); @@ -608,7 +635,7 @@ void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, void space_do_parts_sort() { /* Pointers to the sorting data. */ - size_t *ind = space_sort_struct.ind; + int *ind = space_sort_struct.ind; struct part *parts = space_sort_struct.parts; struct xpart *xparts = space_sort_struct.xparts; @@ -730,7 +757,7 @@ void space_do_parts_sort() { } /* main loop. */ } -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, +void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min, int max) { struct qstack { diff --git a/src/space.h b/src/space.h index 91485ff7e2ebe9da8ab927748589ae9f71320803..e761595838ae78b0d8a67cca676cfa59f3f700f6 100644 --- a/src/space.h +++ b/src/space.h @@ -64,9 +64,6 @@ struct space { /* The minimum and maximum cutoff radii. */ double h_max, cell_min; - /* Current time step for particles. */ - float dt_step; - /* Current maximum displacement for particles. */ float dx_max; @@ -106,6 +103,8 @@ struct space { /* Buffers for parts that we will receive from foreign cells. */ struct part *parts_foreign; size_t nr_parts_foreign, size_parts_foreign; + struct gpart *gparts_foreign; + size_t nr_gparts_foreign, size_gparts_foreign; }; /* Interval stack necessary for parallel particle sorting. */ @@ -117,7 +116,7 @@ struct qstack { struct parallel_sort { struct part *parts; struct xpart *xparts; - size_t *ind; + int *ind; struct qstack *stack; unsigned int stack_size; volatile unsigned int first, last, waiting; @@ -125,9 +124,9 @@ struct parallel_sort { extern struct parallel_sort space_sort_struct; /* function prototypes. */ -void space_parts_sort(struct space *s, size_t *ind, size_t N, int min, int max, +void space_parts_sort(struct space *s, int *ind, size_t N, int min, int max, int verbose); -void space_gparts_sort(struct gpart *gparts, size_t *ind, size_t N, int min, +void space_gparts_sort(struct gpart *gparts, int *ind, size_t N, int min, int max); struct cell *space_getcell(struct space *s); int space_getsid(struct space *s, struct cell **ci, struct cell **cj, diff --git a/src/swift.h b/src/swift.h index 9ab090dccd195ff4927d3e614e446b36d273f824..e568a28c888295affc9ec45b6d059d34f5b4bf04 100644 --- a/src/swift.h +++ b/src/swift.h @@ -27,7 +27,6 @@ #include "cell.h" #include "clocks.h" #include "const.h" -#include "const.h" #include "cycle.h" #include "debug.h" #include "engine.h" @@ -38,7 +37,9 @@ #include "map.h" #include "multipole.h" #include "parallel_io.h" +#include "parser.h" #include "part.h" +#include "partition.h" #include "queue.h" #include "runner.h" #include "scheduler.h" @@ -47,9 +48,8 @@ #include "space.h" #include "task.h" #include "timers.h" -#include "units.h" #include "tools.h" -#include "partition.h" +#include "units.h" #include "version.h" #endif /* SWIFT_SWIFT_H */ diff --git a/src/tools.c b/src/tools.c index 5feba7759f730faea1f38ceb9835f2076bc37a56..d25b7401a1e0515c650333b41193d54b5e155d39 100644 --- a/src/tools.c +++ b/src/tools.c @@ -236,6 +236,53 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { } } +void self_all_density(struct runner *r, struct cell *ci) { + float r2, hi, hj, hig2, hjg2, dxi[3]; //, dxj[3]; + struct part *pi, *pj; + + /* Implements a double-for loop and checks every interaction */ + for (int i = 0; i < ci->count; ++i) { + + pi = &ci->parts[i]; + hi = pi->h; + hig2 = hi * hi * kernel_gamma2; + + for (int j = i + 1; j < ci->count; ++j) { + + pj = &ci->parts[j]; + hj = pj->h; + hjg2 = hj * hj * kernel_gamma2; + + if (pi == pj) continue; + + /* Pairwise distance */ + r2 = 0.0f; + for (int k = 0; k < 3; k++) { + dxi[k] = ci->parts[i].x[k] - ci->parts[j].x[k]; + r2 += dxi[k] * dxi[k]; + } + + /* Hit or miss? */ + if (r2 < hig2) { + + /* Interact */ + runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj); + } + + /* Hit or miss? */ + if (r2 < hjg2) { + + dxi[0] = -dxi[0]; + dxi[1] = -dxi[1]; + dxi[2] = -dxi[2]; + + /* Interact */ + runner_iact_nonsym_density(r2, dxi, hj, hi, pj, pi); + } + } + } +} + void pairs_single_grav(double *dim, long long int pid, struct gpart *__restrict__ parts, int N, int periodic) { @@ -253,9 +300,9 @@ void pairs_single_grav(double *dim, long long int pid, break; if (k == N) error("Part not found."); pi = parts[k]; - pi.a[0] = 0.0f; - pi.a[1] = 0.0f; - pi.a[2] = 0.0f; + pi.a_grav[0] = 0.0f; + pi.a_grav[1] = 0.0f; + pi.a_grav[2] = 0.0f; /* Loop over all particle pairs. */ for (k = 0; k < N; k++) { @@ -273,15 +320,15 @@ void pairs_single_grav(double *dim, long long int pid, } r2 = fdx[0] * fdx[0] + fdx[1] * fdx[1] + fdx[2] * fdx[2]; runner_iact_grav(r2, fdx, &pi, &pj); - a[0] += pi.a[0]; - a[1] += pi.a[1]; - a[2] += pi.a[2]; - aabs[0] += fabsf(pi.a[0]); - aabs[1] += fabsf(pi.a[1]); - aabs[2] += fabsf(pi.a[2]); - pi.a[0] = 0.0f; - pi.a[1] = 0.0f; - pi.a[2] = 0.0f; + a[0] += pi.a_grav[0]; + a[1] += pi.a_grav[1]; + a[2] += pi.a_grav[2]; + aabs[0] += fabsf(pi.a_grav[0]); + aabs[1] += fabsf(pi.a_grav[1]); + aabs[2] += fabsf(pi.a_grav[2]); + pi.a_grav[0] = 0.0f; + pi.a_grav[1] = 0.0f; + pi.a_grav[2] = 0.0f; } /* Dump the result. */ diff --git a/src/tools.h b/src/tools.h index 59646291bda46a7dd0f5a34e158e3e0a6f21d3ca..ccffc77ceb8a967fd40c3737651ba75d529eee0f 100644 --- a/src/tools.h +++ b/src/tools.h @@ -33,6 +33,7 @@ void pairs_single_density(double *dim, long long int pid, struct part *__restrict__ parts, int N, int periodic); void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj); +void self_all_density(struct runner *r, struct cell *ci); void pairs_n2(double *dim, struct part *__restrict__ parts, int N, int periodic); diff --git a/tests/Makefile.am b/tests/Makefile.am index f0bfbefd3c7f4591134d1707c4ac9bf63278e855..d66282059d874f345437d779d59ec3edb08e47cb 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -21,10 +21,12 @@ AM_CFLAGS = -I../src $(HDF5_CPPFLAGS) -DTIMER AM_LDFLAGS = ../src/.libs/libswiftsim.a $(HDF5_LDFLAGS) $(HDF5_LIBS) # List of programs and scripts to run in the test suite -TESTS = testGreetings testReading.sh testSingle testTimeIntegration +TESTS = testGreetings testReading.sh testSingle testPair.sh testPairPerturbed.sh \ + test27cells.sh test27cellsPerturbed.sh testParser.sh # List of test programs to compile -check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration testSPHStep testVectorize +check_PROGRAMS = testGreetings testReading testSingle testTimeIntegration \ + testSPHStep testPair test27cells testParser # Sources for the individual programs testGreetings_SOURCES = testGreetings.c @@ -37,7 +39,13 @@ testSPHStep_SOURCES = testSPHStep.c testSingle_SOURCES = testSingle.c -testVectorize_SOURCES = testVectorize.c +testPair_SOURCES = testPair.c + +test27cells_SOURCES = test27cells.c + +testParser_SOURCES = testParser.c # Files necessary for distribution -EXTRA_DIST = testReading.sh makeInput.py +EXTRA_DIST = testReading.sh makeInput.py testPair.sh testPairPerturbed.sh \ + test27cells.sh test27cellsPerturbed.sh tolerance.dat testParser.sh \ + testParserInput.yaml diff --git a/tests/difffloat.py b/tests/difffloat.py new file mode 100644 index 0000000000000000000000000000000000000000..bbb7c95a1e77e04bbe21bec6dc6c5d529cd77c70 --- /dev/null +++ b/tests/difffloat.py @@ -0,0 +1,103 @@ +############################################################################### + # This file is part of SWIFT. + # Copyright (c) 2016 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + # + # This program is free software: you can redistribute it and/or modify + # it under the terms of the GNU Lesser General Public License as published + # by the Free Software Foundation, either version 3 of the License, or + # (at your option) any later version. + # + # This program is distributed in the hope that it will be useful, + # but WITHOUT ANY WARRANTY; without even the implied warranty of + # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + # GNU General Public License for more details. + # + # You should have received a copy of the GNU Lesser General Public License + # along with this program. If not, see <http://www.gnu.org/licenses/>. + # + ############################################################################## + +from numpy import * +import sys + +abs_tol = 1e-7 +rel_tol = 1e-7 + +# Compares the content of two ASCII tables of floats line by line and +# reports all differences beyond the given tolerances +# Comparisons are done both in absolute and relative terms + +# Individual tolerances for each column can be provided in a file + +file1 = sys.argv[1] +file2 = sys.argv[2] +fileTol = "" + +if len(sys.argv) == 4: + fileTol = sys.argv[3] + +data1 = loadtxt(file1) +data2 = loadtxt(file2) +if fileTol != "": + dataTol = loadtxt(fileTol) + n_linesTol = shape(dataTol)[0] + n_columnsTol = shape(dataTol)[1] + + +if shape(data1) != shape(data2): + print "Non-matching array sizes in the files", file1, "and", file2, "." + sys.exit(1) + +n_lines = shape(data1)[0] +n_columns = shape(data1)[1] + +if fileTol != "": + if n_linesTol != 2: + print "Incorrect number of lines in tolerance file '%s'."%fileTol + if n_columnsTol != n_columns: + print "Incorrect number of columns in tolerance file '%s'."%fileTol + +if fileTol == "": + print "Absolute difference tolerance:", abs_tol + print "Relative difference tolerance:", rel_tol + absTol = ones(n_columns) * abs_tol + relTol = ones(n_columns) * rel_tol +else: + print "Tolerances read from file" + absTol = dataTol[0,:] + relTol = dataTol[1,:] + +error = False +for i in range(n_lines): + for j in range(n_columns): + + abs_diff = abs(data1[i,j] - data2[i,j]) + + sum = abs(data1[i,j] + data2[i,j]) + if sum > 0: + rel_diff = abs(data1[i,j] - data2[i,j]) / sum + else: + rel_diff = 0. + + if( abs_diff > absTol[j]): + print "Absolute difference larger than tolerance (%e) on line %d, column %d:"%(absTol[j], i,j) + print "%10s: a = %e"%("File 1", data1[i,j]) + print "%10s: b = %e"%("File 2", data2[i,j]) + print "%10s: |a-b| = %e"%("Difference", abs_diff) + print "" + error = True + + if( rel_diff > relTol[j]): + print "Relative difference larger than tolerance (%e) on line %d, column %d:"%(relTol[j], i,j) + print "%10s: a = %e"%("File 1", data1[i,j]) + print "%10s: b = %e"%("File 2", data2[i,j]) + print "%10s: |a-b|/|a+b| = %e"%("Difference", rel_diff) + print "" + error = True + + +if error: + exit(1) +else: + print "No differences found" + exit(0) diff --git a/tests/test27cells.c b/tests/test27cells.c new file mode 100644 index 0000000000000000000000000000000000000000..74c38996a81056b10633bf2bbf18cc7cff7e8f0d --- /dev/null +++ b/tests/test27cells.c @@ -0,0 +1,367 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include "swift.h" + +/** + * Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + +/* n is both particles per axis and box size: + * particles are generated on a mesh with unit spacing + */ +struct cell *make_cell(size_t n, double *offset, double size, double h, + double density, long long *partId, double pert) { + const size_t count = n * n * n; + const double volume = size * size * size; + struct cell *cell = malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + // part->v[0] = part->x[0] - 1.5; + // part->v[1] = part->x[1] - 1.5; + // part->v[2] = part->x[2] - 1.5; + part->v[0] = random_uniform(-0.05, 0.05); + part->v[1] = random_uniform(-0.05, 0.05); + part->v[2] = random_uniform(-0.05, 0.05); + part->h = size * h / (float)n; + part->id = ++(*partId); + part->mass = density * volume / count; + part->ti_begin = 0; + part->ti_end = 1; + ++part; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h; + cell->count = count; + cell->dx_max = 0.; + cell->h[0] = size; + cell->h[1] = size; + cell->h[2] = size; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_end_min = 1; + cell->ti_end_max = 1; + + cell->sorted = 0; + cell->sort = NULL; + cell->sortsize = 0; + runner_dosort(NULL, cell, 0x1FFF, 0); + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sort); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + c->parts[pid].rho = 0.f; + c->parts[pid].rho_dh = 0.f; + hydro_init_part(&c->parts[pid]); + } +} + +/** + * @brief Ends the loop by adding the appropriate coefficients + */ +void end_calculation(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + hydro_end_density(&c->parts[pid], 1); + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *main_cell, + struct cell **cells) { + + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + + fprintf(file, "# Main cell --------------------------------------------\n"); + + /* Write main cell */ + for (size_t pid = 0; pid < main_cell->count; pid++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + main_cell->parts[pid].id, main_cell->parts[pid].x[0], + main_cell->parts[pid].x[1], main_cell->parts[pid].x[2], + main_cell->parts[pid].v[0], main_cell->parts[pid].v[1], + main_cell->parts[pid].v[2], main_cell->parts[pid].rho, + main_cell->parts[pid].rho_dh, main_cell->parts[pid].density.wcount, + main_cell->parts[pid].density.wcount_dh, +#ifdef GADGET2_SPH + main_cell->parts[pid].div_v, main_cell->parts[pid].density.rot_v[0], + main_cell->parts[pid].density.rot_v[1], + main_cell->parts[pid].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + /* Write all other cells */ + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + + struct cell *cj = cells[i * 9 + j * 3 + k]; + if (cj == main_cell) continue; + + fprintf(file, + "# Offset: [%2d %2d %2d] -----------------------------------\n", + i - 1, j - 1, k - 1); + + for (size_t pjd = 0; pjd < cj->count; pjd++) { + fprintf( + file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], + cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], + cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, + cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, +#ifdef GADGET2_SPH + cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + } + } + } + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); +void runner_doself1_density(struct runner *r, struct cell *ci); + +/* And go... */ +int main(int argc, char *argv[]) { + + size_t runs = 0, particles = 0; + double h = 1.12575, size = 1., rho = 1.; + double perturbation = 0.; + char outputFileNameExtension[200] = ""; + char outputFileName[200] = ""; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + /* Get some randomness going */ + srand(0); + + char c; + while ((c = getopt(argc, argv, "m:s:h:p:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 's': + sscanf(optarg, "%lf", &size); + break; + case 'p': + sscanf(optarg, "%zu", &particles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'm': + sscanf(optarg, "%lf", &rho); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0) { + printf( + "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates a cell pair, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair1_density." + "\n\nOptions:" + "\n-h DISTANCE=1.1255 - Smoothing length" + "\n-m rho - Physical density in the cell" + "\n-s size - Physical size of the cell" + "\n-d pert - Perturbation to apply to the particles [0,1[" + "\n-f fileName - Part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + /* Help users... */ + message("Smoothing length: h = %f", h); + message("Neighbour target: N = %f", kernel_nwneigh); + + /* Build the infrastructure */ + struct space space; + space.periodic = 0; + space.h_max = h; + + struct engine engine; + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 1; + + struct runner runner; + runner.e = &engine; + + /* Construct some cells */ + struct cell *cells[27]; + struct cell *main_cell; + static long long partId = 0; + for (int i = 0; i < 3; ++i) { + for (int j = 0; j < 3; ++j) { + for (int k = 0; k < 3; ++k) { + + double offset[3] = {i * size, j * size, k * size}; + cells[i * 9 + j * 3 + k] = + make_cell(particles, offset, size, h, rho, &partId, perturbation); + } + } + } + + /* Store the main cell for future use */ + main_cell = cells[13]; + + ticks time = 0; + for (size_t i = 0; i < runs; ++i) { + + /* Zero the fields */ + for (int j = 0; j < 27; ++j) zero_particle_fields(cells[j]); + + const ticks tic = getticks(); + + /* Run all the pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) + runner_dopair1_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + runner_doself1_density(&runner, main_cell); + + const ticks toc = getticks(); + time += toc - tic; + + /* Let's get physical ! */ + end_calculation(main_cell); + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_dopair_27_%s.dat", + outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + } + } + + /* Output timing */ + message("SWIFT calculation took : %15lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + for (int i = 0; i < 27; ++i) zero_particle_fields(cells[i]); + + const ticks tic = getticks(); + + /* Run all the brute-force pairs */ + for (int j = 0; j < 27; ++j) + if (cells[j] != main_cell) pairs_all_density(&runner, main_cell, cells[j]); + + /* And now the self-interaction */ + self_all_density(&runner, main_cell); + + const ticks toc = getticks(); + + /* Let's get physical ! */ + end_calculation(main_cell); + + /* Dump */ + sprintf(outputFileName, "brute_force_27_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, main_cell, cells); + + /* Output timing */ + message("Brute force calculation took : %15lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + for (int i = 0; i < 27; ++i) clean_up(cells[i]); + + return 0; +} diff --git a/tests/test27cells.sh b/tests/test27cells.sh new file mode 100755 index 0000000000000000000000000000000000000000..09d2513bd3ef404c7bf434948af7f10306c98ede --- /dev/null +++ b/tests/test27cells.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_27_standard.dat swift_dopair_27_standard.dat + +./test27cells -p 6 -r 1 -d 0 -f standard + +python difffloat.py brute_force_27_standard.dat swift_dopair_27_standard.dat tolerance.dat + +exit $? diff --git a/tests/test27cellsPerturbed.sh b/tests/test27cellsPerturbed.sh new file mode 100755 index 0000000000000000000000000000000000000000..73d2933984d38f7dcc992f07ec2e016f3544b636 --- /dev/null +++ b/tests/test27cellsPerturbed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat + +./test27cells -p 6 -r 1 -d 0.1 -f perturbed + +python difffloat.py brute_force_27_perturbed.dat swift_dopair_27_perturbed.dat tolerance.dat + +exit $? diff --git a/tests/testPair.c b/tests/testPair.c new file mode 100644 index 0000000000000000000000000000000000000000..23ce4eb3de460f4e17b7b6f81cb39a628f3d100f --- /dev/null +++ b/tests/testPair.c @@ -0,0 +1,305 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2015 Matthieu Schaller (matthieu.schaller@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include <fenv.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include "swift.h" + +/** + * Returns a random number (uniformly distributed) in [a,b[ + */ +double random_uniform(double a, double b) { + return (rand() / (double)RAND_MAX) * (b - a) + a; +} + +/* n is both particles per axis and box size: + * particles are generated on a mesh with unit spacing + */ +struct cell *make_cell(size_t n, double *offset, double size, double h, + double density, unsigned long long *partId, + double pert) { + const size_t count = n * n * n; + const double volume = size * size * size; + struct cell *cell = malloc(sizeof(struct cell)); + bzero(cell, sizeof(struct cell)); + + if (posix_memalign((void **)&cell->parts, part_align, + count * sizeof(struct part)) != 0) { + error("couldn't allocate particles, no. of particles: %d", (int)count); + } + bzero(cell->parts, count * sizeof(struct part)); + + /* Construct the parts */ + struct part *part = cell->parts; + for (size_t x = 0; x < n; ++x) { + for (size_t y = 0; y < n; ++y) { + for (size_t z = 0; z < n; ++z) { + part->x[0] = + offset[0] + + size * (x + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[1] = + offset[1] + + size * (y + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + part->x[2] = + offset[2] + + size * (z + 0.5 + random_uniform(-0.5, 0.5) * pert) / (float)n; + // part->v[0] = part->x[0] - 1.5; + // part->v[1] = part->x[1] - 1.5; + // part->v[2] = part->x[2] - 1.5; + part->v[0] = random_uniform(-0.05, 0.05); + part->v[1] = random_uniform(-0.05, 0.05); + part->v[2] = random_uniform(-0.05, 0.05); + part->h = size * h / (float)n; + part->id = ++(*partId); + part->mass = density * volume / count; + part->ti_begin = 0; + part->ti_end = 1; + ++part; + } + } + } + + /* Cell properties */ + cell->split = 0; + cell->h_max = h; + cell->count = count; + cell->dx_max = 0.; + cell->h[0] = n; + cell->h[1] = n; + cell->h[2] = n; + cell->loc[0] = offset[0]; + cell->loc[1] = offset[1]; + cell->loc[2] = offset[2]; + + cell->ti_end_min = 1; + cell->ti_end_max = 1; + + cell->sorted = 0; + cell->sort = NULL; + cell->sortsize = 0; + runner_dosort(NULL, cell, 0x1FFF, 0); + + return cell; +} + +void clean_up(struct cell *ci) { + free(ci->parts); + free(ci->sort); + free(ci); +} + +/** + * @brief Initializes all particles field to be ready for a density calculation + */ +void zero_particle_fields(struct cell *c) { + + for (size_t pid = 0; pid < c->count; pid++) { + c->parts[pid].rho = 0.f; + c->parts[pid].rho_dh = 0.f; + hydro_init_part(&c->parts[pid]); + } +} + +/** + * @brief Dump all the particles to a file + */ +void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { + + FILE *file = fopen(fileName, "w"); + + /* Write header */ + fprintf(file, + "# %4s %10s %10s %10s %10s %10s %10s %13s %13s %13s %13s %13s " + "%13s %13s %13s\n", + "ID", "pos_x", "pos_y", "pos_z", "v_x", "v_y", "v_z", "rho", "rho_dh", + "wcount", "wcount_dh", "div_v", "curl_vx", "curl_vy", "curl_vz"); + + fprintf(file, "# ci --------------------------------------------\n"); + + for (size_t pid = 0; pid < ci->count; pid++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + ci->parts[pid].id, ci->parts[pid].x[0], ci->parts[pid].x[1], + ci->parts[pid].x[2], ci->parts[pid].v[0], ci->parts[pid].v[1], + ci->parts[pid].v[2], ci->parts[pid].rho, ci->parts[pid].rho_dh, + ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, +#ifdef GADGET2_SPH + ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], + ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + fprintf(file, "# cj --------------------------------------------\n"); + + for (size_t pjd = 0; pjd < cj->count; pjd++) { + fprintf(file, + "%6llu %10f %10f %10f %10f %10f %10f %13e %13e %13e %13e %13e " + "%13e %13e %13e\n", + cj->parts[pjd].id, cj->parts[pjd].x[0], cj->parts[pjd].x[1], + cj->parts[pjd].x[2], cj->parts[pjd].v[0], cj->parts[pjd].v[1], + cj->parts[pjd].v[2], cj->parts[pjd].rho, cj->parts[pjd].rho_dh, + cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, +#ifdef GADGET2_SPH + cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], + cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2] +#else + 0., 0., 0., 0. +#endif + ); + } + + fclose(file); +} + +/* Just a forward declaration... */ +void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); + +int main(int argc, char *argv[]) { + size_t particles = 0, runs = 0, volume, type = 0; + double offset[3] = {0, 0, 0}, h = 1.1255, size = 1., rho = 1.; + double perturbation = 0.1; + struct cell *ci, *cj; + struct space space; + struct engine engine; + struct runner runner; + char c; + static unsigned long long partId = 0; + char outputFileNameExtension[200] = ""; + char outputFileName[200] = ""; + ticks tic, toc, time; + + /* Initialize CPU frequency, this also starts time. */ + unsigned long long cpufreq = 0; + clocks_set_cpufreq(cpufreq); + + srand(0); + + while ((c = getopt(argc, argv, "h:p:r:t:d:f:")) != -1) { + switch (c) { + case 'h': + sscanf(optarg, "%lf", &h); + break; + case 'p': + sscanf(optarg, "%zu", &particles); + break; + case 'r': + sscanf(optarg, "%zu", &runs); + break; + case 't': + sscanf(optarg, "%zu", &type); + break; + case 'd': + sscanf(optarg, "%lf", &perturbation); + break; + case 'f': + strcpy(outputFileNameExtension, optarg); + break; + case '?': + error("Unknown option."); + break; + } + } + + if (h < 0 || particles == 0 || runs == 0 || type > 2) { + printf( + "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" + "\nGenerates a cell pair, filled with particles on a Cartesian grid." + "\nThese are then interacted using runner_dopair1_density." + "\n\nOptions:" + "\n-t TYPE=0 - cells share face (0), edge (1) or corner (2)" + "\n-h DISTANCE=1.1255 - smoothing length" + "\n-d pert - perturbation to apply to the particles [0,1[" + "\n-f fileName - part of the file name used to save the dumps\n", + argv[0]); + exit(1); + } + + space.periodic = 0; + space.h_max = h; + + engine.s = &space; + engine.time = 0.1f; + engine.ti_current = 1; + runner.e = &engine; + + volume = particles * particles * particles; + message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part)); + + ci = make_cell(particles, offset, size, h, rho, &partId, perturbation); + for (size_t i = 0; i < type + 1; ++i) offset[i] = 1.; + cj = make_cell(particles, offset, size, h, rho, &partId, perturbation); + + time = 0; + for (size_t i = 0; i < runs; ++i) { + + /* Zero the fields */ + zero_particle_fields(ci); + zero_particle_fields(cj); + + tic = getticks(); + + /* Run the test */ + runner_dopair1_density(&runner, ci, cj); + + toc = getticks(); + time += toc - tic; + + /* Dump if necessary */ + if (i % 50 == 0) { + sprintf(outputFileName, "swift_dopair_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, ci, cj); + } + } + + /* Output timing */ + message("SWIFT calculation took %lli ticks.", time / runs); + + /* Now perform a brute-force version for accuracy tests */ + + /* Zero the fields */ + zero_particle_fields(ci); + zero_particle_fields(cj); + + tic = getticks(); + + /* Run the brute-force test */ + pairs_all_density(&runner, ci, cj); + + toc = getticks(); + + /* Dump */ + sprintf(outputFileName, "brute_force_%s.dat", outputFileNameExtension); + dump_particle_fields(outputFileName, ci, cj); + + /* Output timing */ + message("Brute force calculation took %lli ticks.", toc - tic); + + /* Clean things to make the sanitizer happy ... */ + clean_up(ci); + clean_up(cj); + + return 0; +} diff --git a/tests/testPair.sh b/tests/testPair.sh new file mode 100755 index 0000000000000000000000000000000000000000..f6f505e56a2c7a5c3cff0ec04bd871278634193c --- /dev/null +++ b/tests/testPair.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_standard.dat swift_dopair_standard.dat + +./testPair -p 6 -r 1 -d 0 -f standard + +python difffloat.py brute_force_standard.dat swift_dopair_standard.dat tolerance.dat + +exit $? diff --git a/tests/testPairPerturbed.sh b/tests/testPairPerturbed.sh new file mode 100755 index 0000000000000000000000000000000000000000..544ba1b032da8426c065dcfb2ce3ee554c5e76a1 --- /dev/null +++ b/tests/testPairPerturbed.sh @@ -0,0 +1,8 @@ +#!/bin/bash +rm brute_force_perturbed.dat swift_dopair_perturbed.dat + +./testPair -p 6 -r 1 -d 0.1 -f perturbed + +python difffloat.py brute_force_perturbed.dat swift_dopair_perturbed.dat tolerance.dat + +exit $? diff --git a/tests/testParser.c b/tests/testParser.c new file mode 100644 index 0000000000000000000000000000000000000000..a4b8789fca056fef659bca78eae9d0effb2ceb66 --- /dev/null +++ b/tests/testParser.c @@ -0,0 +1,67 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (C) 2016 James Willis (james.s.willis@durham.ac.uk). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +#include "parser.h" +#include <assert.h> +#include <string.h> +#include <math.h> + +int main(int argc, char *argv[]) { + + const char *input_file = argv[1]; + + /* Create a structure to read file into. */ + struct swift_params param_file; + + /* Create variables that will be set from the parameter file. */ + int no_of_threads = 0; + int no_of_time_steps = 0; + float max_h = 0.0f; + double start_time = 0.0; + char ic_file[PARSER_MAX_LINE_SIZE]; + + /* Read the parameter file. */ + parser_read_file(input_file, ¶m_file); + + /* Print the contents of the structure. */ + parser_print_params(¶m_file); + + /* Retrieve parameters and store them in variables defined above. + * Have to specify the name of the parameter as it appears in the + * input file: testParserInput.yaml.*/ + parser_get_param_int(¶m_file, "no_of_threads", &no_of_threads); + parser_get_param_int(¶m_file, "no_of_time_steps", &no_of_time_steps); + parser_get_param_float(¶m_file, "max_h", &max_h); + parser_get_param_double(¶m_file, "start_time", &start_time); + parser_get_param_string(¶m_file, "ic_file", ic_file); + + /* Print the variables to check their values are correct. */ + printf( + "no_of_threads: %d, no_of_time_steps: %d, max_h: %f, start_time: %lf, " + "ic_file: %s\n", + no_of_threads, no_of_time_steps, max_h, start_time, ic_file); + + assert(no_of_threads == 16); + assert(no_of_time_steps == 10); + assert(fabs(max_h - 1.1255) < 0.00001); + assert(fabs(start_time - 1.23456789) < 0.00001); + assert(strcmp(ic_file, "ic_file.ini") == 0); /*strcmp returns 0 if correct.*/ + + return 0; +} diff --git a/tests/testParser.sh b/tests/testParser.sh new file mode 100755 index 0000000000000000000000000000000000000000..3dad7f386f792ff2beb6e94eb093bad4085023a4 --- /dev/null +++ b/tests/testParser.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +./testParser testParserInput.yaml diff --git a/tests/testParserInput.yaml b/tests/testParserInput.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d695e6a8ddd327e31224f36a6e34767ea8d36408 --- /dev/null +++ b/tests/testParserInput.yaml @@ -0,0 +1,9 @@ +--- +no_of_threads: 16 # The number of threads that will be used. +no_of_time_steps: 10 +max_h: 1.1255 +start_time: 1.23456789 +#Input file +ic_file: ic_file.ini + +... diff --git a/tests/testReading.c b/tests/testReading.c index d2a2a766171a85ace486914f0f39a987d9d8c3d3..9dda4c7bad75d35a8a93e0c2acb0619409a91afd 100644 --- a/tests/testReading.c +++ b/tests/testReading.c @@ -22,7 +22,7 @@ int main() { - int Ngas = -1, Ngpart = -1; + size_t Ngas = 0, Ngpart = 0; int periodic = -1; int i, j, k, n; double dim[3]; diff --git a/tests/testSPHStep.c b/tests/testSPHStep.c index 984b8ea867250d0bda1bc14d2600279a27321b2c..223078ecb637e64d94e37cdf8c0f60a86bdd5ff7 100644 --- a/tests/testSPHStep.c +++ b/tests/testSPHStep.c @@ -77,6 +77,10 @@ struct cell *make_cell(size_t N, float cellSize, int offset[3], int id_offset) { #ifdef DEFAULT_SPH +/* Just a forward declaration... */ +void runner_doself1_density(struct runner *r, struct cell *ci); +void runner_doself2_force(struct runner *r, struct cell *ci); + /* Run a full time step integration for one cell */ int main() { @@ -132,7 +136,7 @@ int main() { /* Initialise the particles */ for (j = 0; j < 27; ++j) { - runner_doinit(&r, cells[j]); + runner_doinit(&r, cells[j], 0); } /* Compute density */ @@ -145,7 +149,7 @@ int main() { runner_doself2_force(&r, ci); runner_dokick(&r, ci, 1); - message("t_end=%f", p->t_end); + message("ti_end=%d", p->ti_end); free(ci->parts); free(ci->xparts); diff --git a/tests/testSingle.c b/tests/testSingle.c index c85b77ff1c5b2285c33fa7787bbd53deab463039..8771fba0c1912905d3936562fa9dad0223d89220 100644 --- a/tests/testSingle.c +++ b/tests/testSingle.c @@ -91,8 +91,8 @@ int main(int argc, char *argv[]) { p2.force.POrho2 = p2.u * (const_hydro_gamma - 1.0f) / p2.rho; /* Dump a header. */ - printParticle_single(&p1); - printParticle_single(&p2); + //printParticle_single(&p1, NULL); + //printParticle_single(&p2, NULL); printf("# r a_1 udt_1 a_2 udt_2\n"); /* Loop over the different radii. */ @@ -103,9 +103,9 @@ int main(int argc, char *argv[]) { r2 = dx[0] * dx[0]; /* Clear the particle fields. */ - p1.a[0] = 0.0f; + p1.a_hydro[0] = 0.0f; p1.force.u_dt = 0.0f; - p2.a[0] = 0.0f; + p2.a_hydro[0] = 0.0f; p2.force.u_dt = 0.0f; /* Interact the particles. */ @@ -130,8 +130,8 @@ int main(int argc, char *argv[]) { /* Output the results. */ printf( - "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], p1.a[0], - p1.a[1], p1.a[2], p1.force.u_dt, + "%.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e %.3e\n", -dx[0], + p1.a_hydro[0], p1.a_hydro[1], p1.a_hydro[2], p1.force.u_dt, /// -dx[0] , p1.rho , p1.density.wcount , p2.rho , p2.density.wcount , w, dwdx, gradw[0], gradw[1], gradw[2]); diff --git a/tests/testVectorize.c b/tests/testVectorize.c deleted file mode 100644 index a18b6e8af5ac3f7b94bd7be3bdf8fd21e49681ff..0000000000000000000000000000000000000000 --- a/tests/testVectorize.c +++ /dev/null @@ -1,212 +0,0 @@ -#include <fenv.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <unistd.h> -#include "swift.h" - -/* n is both particles per axis and box size: - * particles are generated on a mesh with unit spacing - */ -struct cell *make_cell(size_t n, double *offset, double h, - unsigned long long *partId) { - size_t count = n * n * n; - struct cell *cell = malloc(sizeof *cell); - struct part *part; - size_t x, y, z, size; - - size = count * sizeof(struct part); - if (posix_memalign((void **)&cell->parts, part_align, size) != 0) { - error("couldn't allocate particles, no. of particles: %d", (int)count); - } - - part = cell->parts; - for (x = 0; x < n; ++x) { - for (y = 0; y < n; ++y) { - for (z = 0; z < n; ++z) { - // Add .5 for symmetry: 0.5, 1.5, 2.5 vs. 0, 1, 2 - part->x[0] = x + offset[0] + 0.5; - part->x[1] = y + offset[1] + 0.5; - part->x[2] = z + offset[2] + 0.5; - part->v[0] = 1.0f; - part->v[1] = 1.0f; - part->v[2] = 1.0f; - part->h = h; - part->id = ++(*partId); - part->mass = 1.0f; - part->ti_begin = 0; - part->ti_end = 1; - ++part; - } - } - } - - cell->split = 0; - cell->h_max = h; - cell->count = count; - cell->dx_max = 1.; - cell->h[0] = n; - cell->h[1] = n; - cell->h[2] = n; - - cell->sort = malloc(13 * count * sizeof *cell->sort); - runner_dosort(NULL, cell, 0x1FFF, 0); - - return cell; -} - -void clean_up(struct cell *ci) { - free(ci->parts); - free(ci->sort); - free(ci); -} - -/** - * @brief Initializes all particles field to be ready for a density calculation - */ -void zero_particle_fields(struct cell *c) { - - for (size_t pid = 0; pid < c->count; pid++) { - c->parts[pid].rho = 0.f; - c->parts[pid].rho_dh = 0.f; - hydro_init_part(&c->parts[pid]); - } -} - -/** - * @brief Dump all the particles to a file - */ -void dump_particle_fields(char *fileName, struct cell *ci, struct cell *cj) { - - FILE *file = fopen(fileName, "w"); - - fprintf(file, - "# ID rho rho_dh wcount wcount_dh div_v curl_v:[x y z]\n"); - - for (size_t pid = 0; pid < ci->count; pid++) { - fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", ci->parts[pid].id, - ci->parts[pid].rho, ci->parts[pid].rho_dh, - ci->parts[pid].density.wcount, ci->parts[pid].density.wcount_dh, - ci->parts[pid].div_v, ci->parts[pid].density.rot_v[0], - ci->parts[pid].density.rot_v[1], ci->parts[pid].density.rot_v[2]); - } - - fprintf(file, "# -----------------------------------\n"); - - for (size_t pjd = 0; pjd < cj->count; pjd++) { - fprintf(file, "%6llu %f %f %f %f %f %f %f %f\n", cj->parts[pjd].id, - cj->parts[pjd].rho, cj->parts[pjd].rho_dh, - cj->parts[pjd].density.wcount, cj->parts[pjd].density.wcount_dh, - cj->parts[pjd].div_v, cj->parts[pjd].density.rot_v[0], - cj->parts[pjd].density.rot_v[1], cj->parts[pjd].density.rot_v[2]); - } - - fclose(file); -} - -/* Just a forward declaration... */ -void runner_dopair1_density(struct runner *r, struct cell *ci, struct cell *cj); - -int main(int argc, char *argv[]) { - size_t particles = 0, runs = 0, volume, type = 0; - double offset[3] = {0, 0, 0}, h = 1.1255; // * DIM/PARTS_PER_AXIS == * 1 - struct cell *ci, *cj; - struct space space; - struct engine engine; - struct runner runner; - char c; - static unsigned long long partId = 0; - ticks tic, toc, time; - - while ((c = getopt(argc, argv, "h:p:r:t:")) != -1) { - switch (c) { - case 'h': - sscanf(optarg, "%lf", &h); - break; - case 'p': - sscanf(optarg, "%zu", &particles); - break; - case 'r': - sscanf(optarg, "%zu", &runs); - break; - case 't': - sscanf(optarg, "%zu", &type); - break; - } - } - - if (h < 0 || particles == 0 || runs == 0 || type > 2) { - printf( - "\nUsage: %s -p PARTICLES_PER_AXIS -r NUMBER_OF_RUNS [OPTIONS...]\n" - "\nGenerates a cell pair, filled with particles on a Cartesian grid." - "\nThese are then interacted using runner_dopair1_density." - "\n\nOptions:" - "\n-t TYPE=0 - cells share face (0), edge (1) or corner (2)" - "\n-h DISTANCE=1.1255 - smoothing length\n", - argv[0]); - exit(1); - } - - volume = particles * particles * particles; - message("particles: %zu B\npositions: 0 B", 2 * volume * sizeof(struct part)); - - ci = make_cell(particles, offset, h, &partId); - for (size_t i = 0; i < type + 1; ++i) offset[i] = particles; - cj = make_cell(particles, offset, h, &partId); - - for (int i = 0; i < 3; ++i) { - space.h_max = h; - space.dt_step = 0.1; - } - - engine.s = &space; - engine.time = 0.1f; - runner.e = &engine; - - time = 0; - for (size_t i = 0; i < runs; ++i) { - - /* Zero the fields */ - zero_particle_fields(ci); - zero_particle_fields(cj); - - tic = getticks(); - - /* Run the test */ - runner_dopair1_density(&runner, ci, cj); - - toc = getticks(); - time += toc - tic; - - /* Dump if necessary */ - if (i % 50 == 0) dump_particle_fields("swift_dopair.dat", ci, cj); - } - - /* Output timing */ - message("SWIFT calculation took %lli ticks.", time / runs); - - /* Now perform a brute-force version for accuracy tests */ - - /* Zero the fields */ - zero_particle_fields(ci); - zero_particle_fields(cj); - - tic = getticks(); - - /* Run the test */ - pairs_all_density(&runner, ci, cj); - - toc = getticks(); - - /* Dump */ - dump_particle_fields("brute_force.dat", ci, cj); - - /* Output timing */ - message("Brute force calculation took %lli ticks.", toc - tic); - - /* Clean things to make the sanitizer happy ... */ - clean_up(ci); - clean_up(cj); - - return 0; -} diff --git a/tests/tolerance.dat b/tests/tolerance.dat new file mode 100644 index 0000000000000000000000000000000000000000..48de4383eab6214812183be25d3036a324ccbc27 --- /dev/null +++ b/tests/tolerance.dat @@ -0,0 +1,3 @@ +# ID pos_x pos_y pos_z v_x v_y v_z rho rho_dh wcount wcount_dh div_v curl_vx curl_vy curl_vz + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-5 1e-5 2e-5 3e-4 1e-5 1e-5 1e-5 1e-5 + 0 1e-6 1e-6 1e-6 1e-6 1e-6 1e-6 1e-5 1.2e-5 1e-5 1e-5 1e-4 1e-4 1e-4 1e-4