diff --git a/configure.ac b/configure.ac index 8d189c1210abf48304ca39b0fc6450323091eb7e..2fa871e22f08065527904eaa5838ac3afe725220 100644 --- a/configure.ac +++ b/configure.ac @@ -381,6 +381,7 @@ fi # Check whether we have any of the ARM v8.1 tick timers AX_ASM_ARM_PMCCNTR AX_ASM_ARM_CNTVCT + # See if we want memuse reporting. AC_ARG_ENABLE([memuse-reports], [AS_HELP_STRING([--enable-memuse-reports], @@ -393,6 +394,18 @@ if test "$enable_memuse_reports" = "yes"; then AC_DEFINE([SWIFT_MEMUSE_REPORTS],1,[Enable memory usage reports]) fi +# See if we want mpi reporting. +AC_ARG_ENABLE([mpiuse-reports], + [AS_HELP_STRING([--enable-mpiuse-reports], + [Output reports about MPI tasks requests@<:@yes/no@:>@] + )], + [enable_mpiuse_reports="$enableval"], + [enable_mpiuse_reports="no"] +) +if test "$enable_mpiuse_reports" = "yes"; then + AC_DEFINE([SWIFT_MPIUSE_REPORTS],1,[Enable MPI task reports]) +fi + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN diff --git a/doc/RTD/source/AnalysisTools/index.rst b/doc/RTD/source/AnalysisTools/index.rst index 8b4467f5f36a5e07f0b5446f4f590b2643990731..52783719b593ce08ea20a8a2c32ec140750da2ba 100644 --- a/doc/RTD/source/AnalysisTools/index.rst +++ b/doc/RTD/source/AnalysisTools/index.rst @@ -31,7 +31,7 @@ To solve this problem, you will need to either access them through an existing s or install ``npm`` and then run the following commands .. code-block:: bash - + npm install http-server -g http-server . @@ -78,3 +78,38 @@ the step, and the total memory still in use per label. Note this includes memory still active from previous steps and the total memory is also continued from the previous dump. +MPI task communication reports +------------------------------ + +When SWIFT is configured using the ``--enable-mpiuse-reports`` flag it will +log any all asynchronous MPI communications made to send particle updates +between nodes to support the tasks. + +The output files are called ``mpiuse_report-rank<m>-step<n>.dat``, i.e. one +per rank per step. These have a line for each request for communication, either +an MPI_Irecv or MPI_Isend and a line for the subsequent completion (successful +MPI_Test). + +Each line of the logs contains the following information: + +.. code-block:: none + + stic: ticks since the start of this step + etic: ticks since the start of the simulation + dtic: ticks that the request was active + step: current step + rank: current rank + otherrank: rank that the request was sent to or expected from + type itype: task type as string and enum + subtype isubtype: task subtype as string and enum + activation: 1 if record for the start of a request, 0 if request completion + tag: MPI tag of the request + size: size, in bytes, of the request + sum: sum, in bytes, of all requests that are currently not logged as complete + +The stic values should be synchronized between ranks as all ranks have a +barrier in place to make sure they start the step together, so should be +suitable for matching between ranks. The unique keys to associate records +between ranks (so that the MPI_Isend and MPI_Irecv pairs can be identified) +are "otherrank/rank/subtype/tag/size" and "rank/otherrank/subtype/tag/size" +for send and recv respectively. When matching ignore step0. diff --git a/examples/main.c b/examples/main.c index d0fec1a2a2b62b69293f6a36aeb32a7ca04e1672..233ea703f40e32c89b28cdc29441e272d8e0444d 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1231,6 +1231,15 @@ int main(int argc, char *argv[]) { } #endif + /* Dump MPI requests if collected. */ +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + { + char dumpfile[40]; + snprintf(dumpfile, 40, "mpiuse_report-rank%d-step%d.dat", engine_rank, 0); + mpiuse_log_dump(dumpfile, clocks_start_ticks); + } +#endif + /* Main simulation loop */ /* ==================== */ int force_stop = 0, resubmit = 0; @@ -1299,6 +1308,16 @@ int main(int argc, char *argv[]) { } #endif + /* Dump MPI requests if collected. */ +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + { + char dumpfile[40]; + snprintf(dumpfile, 40, "mpiuse_report-rank%d-step%d.dat", engine_rank, + j + 1); + mpiuse_log_dump(dumpfile, e.tic_step); + } +#endif // WITH_MPI + #ifdef SWIFT_DEBUG_THREADPOOL /* Dump the task data using the given frequency. */ if (dump_threadpool && (dump_threadpool == 1 || j % dump_threadpool == 1)) { diff --git a/src/Makefile.am b/src/Makefile.am index 665aa4b24c94162fb8f772edd346f3c95a1d7ddb..c2a3a7755a188de6c907afe99e5ae1c4dfc5f3a8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -50,11 +50,12 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ chemistry.h chemistry_io.h chemistry_struct.h cosmology.h restart.h space_getsid.h utilities.h \ mesh_gravity.h cbrt.h exp10.h velociraptor_interface.h swift_velociraptor_part.h outputlist.h \ logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h fof.h fof_struct.h fof_io.h \ - star_formation_struct.h star_formation.h \ + star_formation_struct.h star_formation.h star_formation_iact.h \ star_formation_logger.h star_formation_logger_struct.h \ pressure_floor.h pressure_floor_struct.h pressure_floor_iact.h \ - velociraptor_struct.h velociraptor_io.h random.h memuse.h black_holes.h black_holes_io.h \ - black_holes_properties.h black_holes_struct.h feedback.h feedback_struct.h feedback_properties.h + velociraptor_struct.h velociraptor_io.h random.h memuse.h mpiuse.h memuse_rnodes.h \ + black_holes.h black_holes_io.h black_holes_properties.h black_holes_struct.h \ + feedback.h feedback_struct.h feedback_properties.h # source files for EAGLE cooling EAGLE_COOLING_SOURCES = @@ -84,7 +85,7 @@ AM_SOURCES = space.c runner_main.c runner_doiact_hydro.c runner_doiact_grav.c \ part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ - outputlist.c velociraptor_dummy.c logger_io.c memuse.c fof.c \ + outputlist.c velociraptor_dummy.c logger_io.c memuse.c mpiuse.c memuse_rnodes.c fof.c \ hashmap.c pressure_floor.c \ $(EAGLE_COOLING_SOURCES) $(EAGLE_FEEDBACK_SOURCES) @@ -182,11 +183,11 @@ nobase_noinst_HEADERS = align.h approx_math.h atomic.h barrier.h cycle.h error.h potential/isothermal/potential.h potential/disc_patch/potential.h \ potential/sine_wave/potential.h \ star_formation/none/star_formation.h star_formation/none/star_formation_struct.h \ - star_formation/none/star_formation_io.h \ + star_formation/none/star_formation_io.h star_formation/none/star_formation_iact.h \ star_formation/EAGLE/star_formation.h star_formation/EAGLE/star_formation_struct.h \ - star_formation/EAGLE/star_formation_io.h \ + star_formation/EAGLE/star_formation_io.h star_formation/EAGLE/star_formation_iact.h \ star_formation/GEAR/star_formation.h star_formation/GEAR/star_formation_struct.h \ - star_formation/GEAR/star_formation_io.h \ + star_formation/GEAR/star_formation_io.h star_formation/GEAR/star_formation_iact.h \ star_formation/EAGLE/star_formation_logger.h star_formation/EAGLE/star_formation_logger_struct.h \ star_formation/GEAR/star_formation_logger.h star_formation/GEAR/star_formation_logger_struct.h \ star_formation/none/star_formation_logger.h star_formation/none/star_formation_logger_struct.h \ diff --git a/src/engine.c b/src/engine.c index d0011f4a294fccb212337f13a8eaec752d4f155e..b17bed977c84b2d316da2034fbfa450187a12275 100644 --- a/src/engine.c +++ b/src/engine.c @@ -2159,6 +2159,11 @@ void engine_step(struct engine *e) { struct clocks_time time1, time2; clocks_gettime(&time1); +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + /* We may want to compare times across ranks, so make sure all steps start + * at the same time, just different ticks. */ + MPI_Barrier(MPI_COMM_WORLD); +#endif e->tic_step = getticks(); if (e->nodeID == 0) { diff --git a/src/engine_redistribute.c b/src/engine_redistribute.c index 3132ad2665c67cd244ae1ec9ece75726788c1506..e2c3ea056e57e5097785a9014e4e0d2500d52a52 100644 --- a/src/engine_redistribute.c +++ b/src/engine_redistribute.c @@ -29,7 +29,6 @@ #include "memswap.h" #ifdef WITH_MPI - /** * Do the exchange of one type of particles with all the other nodes. * @@ -44,6 +43,8 @@ * @param mpi_type the MPI_Datatype for these particles. * @param nr_nodes the number of nodes to exchange with. * @param nodeID the id of this node. + * @param syncredist whether to use slower more memory friendly synchronous + * exchanges. * * @result new particle data constructed from all the exchanges with the * given alignment. @@ -51,7 +52,7 @@ static void *engine_do_redistribute(const char *label, int *counts, char *parts, size_t new_nr_parts, size_t sizeofparts, size_t alignsize, MPI_Datatype mpi_type, - int nr_nodes, int nodeID) { + int nr_nodes, int nodeID, int syncredist) { /* Allocate a new particle array with some extra margin */ char *parts_new = NULL; @@ -60,100 +61,178 @@ static void *engine_do_redistribute(const char *label, int *counts, char *parts, sizeofparts * new_nr_parts * engine_redistribute_alloc_margin) != 0) error("Failed to allocate new particle data."); - /* Prepare MPI requests for the asynchronous communications */ - MPI_Request *reqs; - if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) == - NULL) - error("Failed to allocate MPI request list."); + if (syncredist) { + + /* Slow synchronous redistribute,. */ + size_t offset_send = 0, offset_recv = 0; - /* Only send and receive only "chunk" particles per request. So we need to - * loop as many times as necessary here. Make 2Gb/sizeofparts so we only - * send 2Gb packets. */ - const int chunk = INT_MAX / sizeofparts; - int sent = 0; - int recvd = 0; + /* Only send and receive only "chunk" particles per request. + * Fixing the message size to 2GB. */ + const int chunk = INT_MAX / sizeofparts; + int res = 0; + for (int k = 0; k < nr_nodes; k++) { + int kk = k; + + /* Rank 0 decides the index of sending node */ + MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); + + int ind_recv = kk * nr_nodes + nodeID; + + if (nodeID == kk) { + + /* Send out our particles. */ + offset_send = 0; + for (int j = 0; j < nr_nodes; j++) { + + int ind_send = kk * nr_nodes + j; + + /* Just copy our own parts */ + if (counts[ind_send] > 0) { + if (j == nodeID) { + memcpy(&parts_new[offset_recv * sizeofparts], + &parts[offset_send * sizeofparts], + sizeofparts * counts[ind_recv]); + offset_send += counts[ind_send]; + offset_recv += counts[ind_recv]; + } else { + for (int i = 0, n = 0; i < counts[ind_send]; n++) { + + /* Count and index, with chunk parts at most. */ + size_t sendc = min(chunk, counts[ind_send] - i); + size_t sendo = offset_send + i; + + res = MPI_Send(&parts[sendo * sizeofparts], sendc, mpi_type, j, + n, MPI_COMM_WORLD); + if (res != MPI_SUCCESS) { + mpi_error(res, "Failed to send parts to node %i from %i.", j, + nodeID); + } + i += sendc; + } + offset_send += counts[ind_send]; + } + } + } + } else { + /* Listen for sends from kk. */ + if (counts[ind_recv] > 0) { + for (int i = 0, n = 0; i < counts[ind_recv]; n++) { + /* Count and index, with +chunk parts at most. */ + size_t recvc = min(chunk, counts[ind_recv] - i); + size_t recvo = offset_recv + i; + + MPI_Status status; + res = MPI_Recv(&parts_new[recvo * sizeofparts], recvc, mpi_type, kk, + n, MPI_COMM_WORLD, &status); + if (res != MPI_SUCCESS) { + mpi_error(res, "Failed to recv of parts from node %i to %i.", kk, + nodeID); + } + i += recvc; + } + offset_recv += counts[ind_recv]; + } + } + } - int activenodes = 1; - while (activenodes) { + } else { + /* Asynchronous redistribute, can take a lot of memory. */ - for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; + /* Prepare MPI requests for the asynchronous communications */ + MPI_Request *reqs; + if ((reqs = (MPI_Request *)malloc(sizeof(MPI_Request) * 2 * nr_nodes)) == + NULL) + error("Failed to allocate MPI request list."); - /* Emit the sends and recvs for the data. */ - size_t offset_send = sent; - size_t offset_recv = recvd; - activenodes = 0; + /* Only send and receive only "chunk" particles per request. So we need to + * loop as many times as necessary here. Make 2Gb/sizeofparts so we only + * send 2Gb packets. */ + const int chunk = INT_MAX / sizeofparts; + int sent = 0; + int recvd = 0; - for (int k = 0; k < nr_nodes; k++) { + int activenodes = 1; + while (activenodes) { - /* Indices in the count arrays of the node of interest */ - const int ind_send = nodeID * nr_nodes + k; - const int ind_recv = k * nr_nodes + nodeID; + for (int k = 0; k < 2 * nr_nodes; k++) reqs[k] = MPI_REQUEST_NULL; - /* Are we sending any data this loop? */ - int sending = counts[ind_send] - sent; - if (sending > 0) { - activenodes++; - if (sending > chunk) sending = chunk; + /* Emit the sends and recvs for the data. */ + size_t offset_send = sent; + size_t offset_recv = recvd; + activenodes = 0; - /* If the send and receive is local then just copy. */ - if (k == nodeID) { - int receiving = counts[ind_recv] - recvd; - if (receiving > chunk) receiving = chunk; - memcpy(&parts_new[offset_recv * sizeofparts], - &parts[offset_send * sizeofparts], sizeofparts * receiving); - } else { - /* Otherwise send it. */ - int res = - MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, k, - ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]); - if (res != MPI_SUCCESS) - mpi_error(res, "Failed to isend parts to node %i.", k); - } - } + for (int k = 0; k < nr_nodes; k++) { - /* If we're sending to this node, then move past it to next. */ - if (counts[ind_send] > 0) offset_send += counts[ind_send]; + /* Indices in the count arrays of the node of interest */ + const int ind_send = nodeID * nr_nodes + k; + const int ind_recv = k * nr_nodes + nodeID; - /* Are we receiving any data from this node? Note already done if coming - * from this node. */ - if (k != nodeID) { - int receiving = counts[ind_recv] - recvd; - if (receiving > 0) { + /* Are we sending any data this loop? */ + int sending = counts[ind_send] - sent; + if (sending > 0) { activenodes++; - if (receiving > chunk) receiving = chunk; - int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], receiving, - mpi_type, k, ind_recv, MPI_COMM_WORLD, - &reqs[2 * k + 1]); - if (res != MPI_SUCCESS) - mpi_error(res, "Failed to emit irecv of parts from node %i.", k); + if (sending > chunk) sending = chunk; + + /* If the send and receive is local then just copy. */ + if (k == nodeID) { + int receiving = counts[ind_recv] - recvd; + if (receiving > chunk) receiving = chunk; + memcpy(&parts_new[offset_recv * sizeofparts], + &parts[offset_send * sizeofparts], sizeofparts * receiving); + } else { + /* Otherwise send it. */ + int res = + MPI_Isend(&parts[offset_send * sizeofparts], sending, mpi_type, + k, ind_send, MPI_COMM_WORLD, &reqs[2 * k + 0]); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to isend parts to node %i.", k); + } } - } - /* If we're receiving from this node, then move past it to next. */ - if (counts[ind_recv] > 0) offset_recv += counts[ind_recv]; - } + /* If we're sending to this node, then move past it to next. */ + if (counts[ind_send] > 0) offset_send += counts[ind_send]; + + /* Are we receiving any data from this node? Note already done if coming + * from this node. */ + if (k != nodeID) { + int receiving = counts[ind_recv] - recvd; + if (receiving > 0) { + activenodes++; + if (receiving > chunk) receiving = chunk; + int res = MPI_Irecv(&parts_new[offset_recv * sizeofparts], + receiving, mpi_type, k, ind_recv, + MPI_COMM_WORLD, &reqs[2 * k + 1]); + if (res != MPI_SUCCESS) + mpi_error(res, "Failed to emit irecv of parts from node %i.", k); + } + } + + /* If we're receiving from this node, then move past it to next. */ + if (counts[ind_recv] > 0) offset_recv += counts[ind_recv]; + } - /* Wait for all the sends and recvs to tumble in. */ - MPI_Status stats[2 * nr_nodes]; - int res; - if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { - for (int k = 0; k < 2 * nr_nodes; k++) { - char buff[MPI_MAX_ERROR_STRING]; - MPI_Error_string(stats[k].MPI_ERROR, buff, &res); - message("request from source %i, tag %i has error '%s'.", - stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); + /* Wait for all the sends and recvs to tumble in. */ + MPI_Status stats[2 * nr_nodes]; + int res; + if ((res = MPI_Waitall(2 * nr_nodes, reqs, stats)) != MPI_SUCCESS) { + for (int k = 0; k < 2 * nr_nodes; k++) { + char buff[MPI_MAX_ERROR_STRING]; + MPI_Error_string(stats[k].MPI_ERROR, buff, &res); + message("request from source %i, tag %i has error '%s'.", + stats[k].MPI_SOURCE, stats[k].MPI_TAG, buff); + } + error("Failed during waitall for part data."); } - error("Failed during waitall for part data."); + + /* Move to next chunks. */ + sent += chunk; + recvd += chunk; } - /* Move to next chunks. */ - sent += chunk; - recvd += chunk; + /* Free temps. */ + free(reqs); } - /* Free temps. */ - free(reqs); - /* And return new memory. */ return parts_new; } @@ -430,7 +509,8 @@ static void engine_redistribute_relink_mapper(void *map_data, int num_elements, * 3) The particles to send are placed in a temporary buffer in which the * part-gpart links are preserved. * 4) Each node allocates enough space for the new particles. - * 5) (Asynchronous) communications are issued to transfer the data. + * 5) Asynchronous or synchronous communications are issued to transfer the + * data. * * * @param e The #engine. @@ -895,7 +975,7 @@ void engine_redistribute(struct engine *e) { /* SPH particles. */ void *new_parts = engine_do_redistribute( "parts", counts, (char *)s->parts, nr_parts_new, sizeof(struct part), - part_align, part_mpi_type, nr_nodes, nodeID); + part_align, part_mpi_type, nr_nodes, nodeID, e->syncredist); swift_free("parts", s->parts); s->parts = (struct part *)new_parts; s->nr_parts = nr_parts_new; @@ -904,32 +984,35 @@ void engine_redistribute(struct engine *e) { /* Extra SPH particle properties. */ new_parts = engine_do_redistribute( "xparts", counts, (char *)s->xparts, nr_parts_new, sizeof(struct xpart), - xpart_align, xpart_mpi_type, nr_nodes, nodeID); + xpart_align, xpart_mpi_type, nr_nodes, nodeID, e->syncredist); swift_free("xparts", s->xparts); s->xparts = (struct xpart *)new_parts; /* Gravity particles. */ - new_parts = engine_do_redistribute( - "gparts", g_counts, (char *)s->gparts, nr_gparts_new, - sizeof(struct gpart), gpart_align, gpart_mpi_type, nr_nodes, nodeID); + new_parts = + engine_do_redistribute("gparts", g_counts, (char *)s->gparts, + nr_gparts_new, sizeof(struct gpart), gpart_align, + gpart_mpi_type, nr_nodes, nodeID, e->syncredist); swift_free("gparts", s->gparts); s->gparts = (struct gpart *)new_parts; s->nr_gparts = nr_gparts_new; s->size_gparts = engine_redistribute_alloc_margin * nr_gparts_new; /* Star particles. */ - new_parts = engine_do_redistribute( - "sparts", s_counts, (char *)s->sparts, nr_sparts_new, - sizeof(struct spart), spart_align, spart_mpi_type, nr_nodes, nodeID); + new_parts = + engine_do_redistribute("sparts", s_counts, (char *)s->sparts, + nr_sparts_new, sizeof(struct spart), spart_align, + spart_mpi_type, nr_nodes, nodeID, e->syncredist); swift_free("sparts", s->sparts); s->sparts = (struct spart *)new_parts; s->nr_sparts = nr_sparts_new; s->size_sparts = engine_redistribute_alloc_margin * nr_sparts_new; /* Black holes particles. */ - new_parts = engine_do_redistribute( - "bparts", b_counts, (char *)s->bparts, nr_bparts_new, - sizeof(struct bpart), bpart_align, bpart_mpi_type, nr_nodes, nodeID); + new_parts = + engine_do_redistribute("bparts", b_counts, (char *)s->bparts, + nr_bparts_new, sizeof(struct bpart), bpart_align, + bpart_mpi_type, nr_nodes, nodeID, e->syncredist); swift_free("bparts", s->bparts); s->bparts = (struct bpart *)new_parts; s->nr_bparts = nr_bparts_new; diff --git a/src/memuse.c b/src/memuse.c index 77344cc6cec56544a6e19cc6a71ca2375aa1916c..00a2a5f879a994d96e0747eba55c56a3161a6b86 100644 --- a/src/memuse.c +++ b/src/memuse.c @@ -27,8 +27,10 @@ #include "../config.h" /* Standard includes. */ +#include <stdint.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <sys/types.h> #include <unistd.h> @@ -39,6 +41,8 @@ #include "atomic.h" #include "clocks.h" #include "engine.h" +#include "error.h" +#include "memuse_rnodes.h" #ifdef SWIFT_MEMUSE_REPORTS @@ -101,254 +105,10 @@ struct memuse_labelled_item { size_t count; }; -/* A radix node, this has a single byte key and a pointer to some related - * resource. It also holds a sorted list of children, if any. */ -struct memuse_rnode { - - /* Byte key of this node. */ - uint8_t keypart; - - /* Value of this node, if set. */ - void *ptr; - - /* Sorted pointers to children of this node. */ - struct memuse_rnode **children; - unsigned int count; -}; - /* Persistent radix trie root node. Holds active logs between dumps. */ static struct memuse_rnode *memuse_rnode_root; static int memuse_rnode_root_init = 1; -#ifdef MEMUSE_RNODE_DUMP -/** - * @brief Dump a representation of the radix tree rooted at a node to stdout. - * - * @param depth the depth of the node in the tree, root is 0. - * @param node the node at which to start dumping. - * @param full if not zero then nodes that are not storing a value - * are also reported. - */ -static void memuse_rnode_dump(int depth, struct memuse_rnode *node, int full) { - - /* Value of the full key, to this depth. Assumes full key is a pointer, - * so uncomment when using strings. */ - static union { - // uint8_t key[MEMUSE_MAXLABLEN]; - // char ptr[MEMUSE_MAXLABLEN]; - uint8_t key[sizeof(uintptr_t)]; - void *ptr; - } keyparts = {0}; - - /* Record keypart at this depth. Root has no keypart. */ - if (depth != 0) keyparts.key[depth - 1] = node->keypart; - - // if (node->ptr != NULL || full) { - // keyparts.key[depth] = '\0'; - // - // /* Gather children's keys if full. */ - // char fullkey[MEMUSE_MAXLABLEN]; - // if (full) { - // for (size_t k = 0; k < node->count; k++) { - // fullkey[k] = node->children[k]->keypart; - // } - // fullkey[node->count] = '\0'; - // printf("dump @ depth: %d keypart: %d key: %s value: %p fullkey: %s\n", - // depth, node->keypart, keyparts.ptr, node->ptr, fullkey); - // } else { - // printf("dump @ depth: %d keypart: %d key: %s value: %p\n", depth, - // node->keypart, keyparts.ptr, node->ptr); - // } - //} - - if (node->ptr != NULL || full) { - printf("dump @ depth: %d keypart: %d key: %p value: %p\n", depth, - node->keypart, keyparts.ptr, node->ptr); - } - - /* Recurse to all children. */ - for (size_t k = 0; k < node->count; k++) { - memuse_rnode_dump(depth + 1, node->children[k], full); - } -} -#endif - -/** - * @brief Return the position of a keypart for a list of children. - * If not found returns where it would be inserted. - * - * @param keypart the keypart to locate. - * @param children the list of sorted children. - * @param count the number of children - * - * @return the index of key or where it should be inserted. - */ -static unsigned int memuse_rnode_bsearch(uint8_t keypart, - struct memuse_rnode **children, - unsigned int count) { - - /* Search for lower bound. */ - unsigned int lower = 0; - unsigned int upper = count; - while (lower < upper) { - unsigned int middle = (upper + lower) / 2; - if (keypart > children[middle]->keypart) - lower = middle + 1; - else - upper = middle; - } - return lower; -} - -/** - * @brief Insert a child, if needed, into a list of children. Assumes - * we have sufficient room. - * - * @param child the child to insert, if needed. - * @param children the list of sorted children. - * @param count the number of children - */ -static void memuse_rnode_binsert_child(struct memuse_rnode *child, - struct memuse_rnode **children, - unsigned int *count) { - unsigned int pos = 0; - if (*count > 0) { - - /* Find the child or insertion point. */ - pos = memuse_rnode_bsearch(child->keypart, children, *count); - - /* If not found move all children to make a space, unless we're inserting - * after the end. */ - if (pos < *count && children[pos]->keypart != child->keypart) { - memmove(&children[pos + 1], &children[pos], - (*count - pos) * sizeof(struct memuse_rnode *)); - } - } - - /* Insert new child */ - children[pos] = child; - *count += 1; -} - -/** - * @brief Add a child rnode to an rnode. Making sure we have room and keeping - * the sort order. - * - * @param node the parent node. - * @param child the node to add to the parent, - */ -static void memuse_rnode_add_child(struct memuse_rnode *node, - struct memuse_rnode *child) { - - /* Extend the children list to include a new entry .*/ - void *mem = realloc(node->children, - (node->count + 1) * sizeof(struct memuse_rnode *)); - if (mem == NULL) error("Failed to reallocate rnodes\n"); - node->children = mem; - - /* Insert the new child. */ - memuse_rnode_binsert_child(child, node->children, &node->count); -} - -/** - * @brief Find a child of a node with the given key part. - * - * @param node the node to search. - * @param keypart the key part of the child. - * @return NULL if not found. - */ -static struct memuse_rnode *memuse_rnode_lookup(const struct memuse_rnode *node, - uint8_t keypart) { - - /* Locate the key, or where it would be inserted. */ - if (node->count > 0) { - unsigned int index = - memuse_rnode_bsearch(keypart, node->children, node->count); - if (index < node->count && keypart == node->children[index]->keypart) { - return node->children[index]; - } - } - return NULL; -} - -/** - * @brief insert a child into a node's children list and add a pointer, iff - * this is the destination node for the given key. - * - * @param node the parent node. - * @param depth the depth of the parent node. - * @param key the full key of the eventual leaf node. - * @param keylen the numbers of bytes in the full key. - * @param value pointer that will be stored as the value of the leaf node. - */ -static void memuse_rnode_insert_child(struct memuse_rnode *node, uint8_t depth, - uint8_t *key, uint8_t keylen, - void *value) { - - /* Check if keypart this already exists at this level and add new child if - * not. */ - uint8_t keypart = key[depth]; - struct memuse_rnode *child = memuse_rnode_lookup(node, keypart); - if (child == NULL) { - child = calloc(1, sizeof(struct memuse_rnode)); - child->keypart = keypart; - memuse_rnode_add_child(node, child); - } - - /* Are we at the lowest level yet? */ - depth++; - if (depth == keylen) { - /* Our destination node. */ - -#if SWIFT_DEBUG_CHECKS - if (child->ptr != NULL) - message("Overwriting rnode value: %p with %p", child->ptr, value); -#endif - child->ptr = value; - return; - } - - /* Down we go to the next level. */ - memuse_rnode_insert_child(child, depth, key, keylen, value); - return; -} - -/** - * @brief Find a child node for the given full key. - * - * @param node the current parent node. - * @param depth the depth of the parent node, 0 for first call. - * @param key the full key of the expected child node. - * @param keylen the number of bytes in the key. - */ -static struct memuse_rnode *memuse_rnode_find_child(struct memuse_rnode *node, - uint8_t depth, uint8_t *key, - uint8_t keylen) { - uint8_t keypart = key[depth]; - struct memuse_rnode *child = NULL; - if (node->count > 0) child = memuse_rnode_lookup(node, keypart); - if (child != NULL && (depth + 1) < keylen) { - return memuse_rnode_find_child(child, depth + 1, key, keylen); - } - return child; -} - -/** - * @brief Free all resources associated with a node. - * - * @param node the rnode. - */ -static void memuse_rnode_cleanup(struct memuse_rnode *node) { - - if (!node) return; - - for (size_t k = 0; k < node->count; k++) { - memuse_rnode_cleanup(node->children[k]); - free(node->children[k]); - } - if (node->count > 0) free(node->children); -} - /** * @brief reallocate the entries log if space is needed. */ diff --git a/src/memuse_rnodes.c b/src/memuse_rnodes.c new file mode 100644 index 0000000000000000000000000000000000000000..9078e8e78fa3c58ac9db8fdad112a79016890438 --- /dev/null +++ b/src/memuse_rnodes.c @@ -0,0 +1,270 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file memuse_rnodes.c + * @brief file of routines used for radix nodes in memory loggers. + */ + +/* Config parameters. */ +#include "../config.h" + +/* Standard includes. */ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +/* Local defines. */ +#include "memuse_rnodes.h" + +/* Local includes. */ +#include "atomic.h" +#include "clocks.h" +#include "error.h" + +/** + * @brief Return the position of a keypart for a list of children. + * If not found returns where it would be inserted. + * + * @param keypart the keypart to locate. + * @param children the list of sorted children. + * @param count the number of children + * + * @return the index of key or where it should be inserted. + */ +static unsigned int memuse_rnode_bsearch(uint8_t keypart, + struct memuse_rnode **children, + unsigned int count) { + + /* Search for lower bound. */ + unsigned int lower = 0; + unsigned int upper = count; + while (lower < upper) { + unsigned int middle = (upper + lower) / 2; + if (keypart > children[middle]->keypart) + lower = middle + 1; + else + upper = middle; + } + return lower; +} + +/** + * @brief Insert a child, if needed, into a list of children. Assumes + * we have sufficient room. + * + * @param child the child to insert, if needed. + * @param children the list of sorted children. + * @param count the number of children + */ +static void memuse_rnode_binsert_child(struct memuse_rnode *child, + struct memuse_rnode **children, + unsigned int *count) { + unsigned int pos = 0; + if (*count > 0) { + + /* Find the child or insertion point. */ + pos = memuse_rnode_bsearch(child->keypart, children, *count); + + /* If not found move all children to make a space, unless we're inserting + * after the end. */ + if (pos < *count && children[pos]->keypart != child->keypart) { + memmove(&children[pos + 1], &children[pos], + (*count - pos) * sizeof(struct memuse_rnode *)); + } + } + + /* Insert new child */ + children[pos] = child; + *count += 1; +} + +/** + * @brief Add a child rnode to an rnode. Making sure we have room and keeping + * the sort order. + * + * @param node the parent node. + * @param child the node to add to the parent, + */ +static void memuse_rnode_add_child(struct memuse_rnode *node, + struct memuse_rnode *child) { + + /* Extend the children list to include a new entry .*/ + void *mem = realloc(node->children, + (node->count + 1) * sizeof(struct memuse_rnode *)); + if (mem == NULL) error("Failed to reallocate rnodes\n"); + node->children = (struct memuse_rnode **)mem; + + /* Insert the new child. */ + memuse_rnode_binsert_child(child, node->children, &node->count); +} + +/** + * @brief Find a child of a node with the given key part. + * + * @param node the node to search. + * @param keypart the key part of the child. + * @return NULL if not found. + */ +static struct memuse_rnode *memuse_rnode_lookup(const struct memuse_rnode *node, + uint8_t keypart) { + + /* Locate the key, or where it would be inserted. */ + if (node->count > 0) { + unsigned int index = + memuse_rnode_bsearch(keypart, node->children, node->count); + if (index < node->count && keypart == node->children[index]->keypart) { + return node->children[index]; + } + } + return NULL; +} + +/** + * @brief insert a child into a node's children list and add a pointer, iff + * this is the destination node for the given key. + * + * @param node the parent node. + * @param depth the depth of the parent node. + * @param key the full key of the eventual leaf node. + * @param keylen the numbers of bytes in the full key. + * @param value pointer that will be stored as the value of the leaf node. + */ +void memuse_rnode_insert_child(struct memuse_rnode *node, uint8_t depth, + uint8_t *key, uint8_t keylen, void *value) { + + /* Check if keypart this already exists at this level and add new child if + * not. */ + uint8_t keypart = key[depth]; + struct memuse_rnode *child = memuse_rnode_lookup(node, keypart); + if (child == NULL) { + child = (struct memuse_rnode *)calloc(1, sizeof(struct memuse_rnode)); + child->keypart = keypart; + memuse_rnode_add_child(node, child); + } + + /* Are we at the lowest level yet? */ + depth++; + if (depth == keylen) { + /* Our destination node. */ + +#if SWIFT_DEBUG_CHECKS + if (child->ptr != NULL) + message("Overwriting rnode value: %p with %p", child->ptr, value); +#endif + child->ptr = value; + return; + } + + /* Down we go to the next level. */ + memuse_rnode_insert_child(child, depth, key, keylen, value); + return; +} + +/** + * @brief Find a child node for the given full key. + * + * @param node the current parent node. + * @param depth the depth of the parent node, 0 for first call. + * @param key the full key of the expected child node. + * @param keylen the number of bytes in the key. + */ +struct memuse_rnode *memuse_rnode_find_child(struct memuse_rnode *node, + uint8_t depth, uint8_t *key, + uint8_t keylen) { + uint8_t keypart = key[depth]; + struct memuse_rnode *child = NULL; + if (node->count > 0) child = memuse_rnode_lookup(node, keypart); + if (child != NULL && (depth + 1) < keylen) { + return memuse_rnode_find_child(child, depth + 1, key, keylen); + } + return child; +} + +/** + * @brief Free all resources associated with a node. + * + * @param node the rnode. + */ +void memuse_rnode_cleanup(struct memuse_rnode *node) { + + if (!node) return; + + for (size_t k = 0; k < node->count; k++) { + memuse_rnode_cleanup(node->children[k]); + free(node->children[k]); + } + if (node->count > 0) free(node->children); +} + +/** + * @brief Dump a representation of the radix tree rooted at a node to stdout. + * + * Debugging code. + * + * @param depth the depth of the node in the tree, root is 0. + * @param node the node at which to start dumping. + * @param full if not zero then nodes that are not storing a value + * are also reported. + */ +void memuse_rnode_dump(int depth, struct memuse_rnode *node, int full) { + + /* Value of the full key, to this depth. Assumes full key is a pointer, + * so uncomment when using strings. */ + static union { + // uint8_t key[MEMUSE_MAXLABLEN]; + // char ptr[MEMUSE_MAXLABLEN]; + uint8_t key[sizeof(uintptr_t)]; + void *ptr; + } keyparts = {0}; + + /* Record keypart at this depth. Root has no keypart. */ + if (depth != 0) keyparts.key[depth - 1] = node->keypart; + + // if (node->ptr != NULL || full) { + // keyparts.key[depth] = '\0'; + // + // /* Gather children's keys if full. */ + // char fullkey[MEMUSE_MAXLABLEN]; + // if (full) { + // for (size_t k = 0; k < node->count; k++) { + // fullkey[k] = node->children[k]->keypart; + // } + // fullkey[node->count] = '\0'; + // printf("dump @ depth: %d keypart: %d key: %s value: %p fullkey: %s\n", + // depth, node->keypart, keyparts.ptr, node->ptr, fullkey); + // } else { + // printf("dump @ depth: %d keypart: %d key: %s value: %p\n", depth, + // node->keypart, keyparts.ptr, node->ptr); + // } + //} + + if (node->ptr != NULL || full) { + printf("dump @ depth: %d keypart: %d key: %p value: %p\n", depth, + node->keypart, keyparts.ptr, node->ptr); + } + + /* Recurse to all children. */ + for (size_t k = 0; k < node->count; k++) { + memuse_rnode_dump(depth + 1, node->children[k], full); + } +} diff --git a/src/memuse_rnodes.h b/src/memuse_rnodes.h new file mode 100644 index 0000000000000000000000000000000000000000..41f24a98ad60396aec06d3170d478834428007ce --- /dev/null +++ b/src/memuse_rnodes.h @@ -0,0 +1,51 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_MEMUSE_RNODE_H +#define SWIFT_MEMUSE_RNODE_H + +/* Config parameters. */ +#include "../config.h" + +/* Includes. */ +#include <stdlib.h> + +/* A radix node, this has a single byte key and a pointer to some related + * resource. It also holds a sorted list of children, if any. */ +struct memuse_rnode { + + /* Byte key of this node. */ + uint8_t keypart; + + /* Value of this node, if set. */ + void *ptr; + + /* Sorted pointers to children of this node. */ + struct memuse_rnode **children; + unsigned int count; +}; + +void memuse_rnode_dump(int depth, struct memuse_rnode *node, int full); +void memuse_rnode_insert_child(struct memuse_rnode *node, uint8_t depth, + uint8_t *key, uint8_t keylen, void *value); +struct memuse_rnode *memuse_rnode_find_child(struct memuse_rnode *node, + uint8_t depth, uint8_t *key, + uint8_t keylen); +void memuse_rnode_cleanup(struct memuse_rnode *node); + +#endif /* SWIFT_MEMUSE_RNODE_H */ diff --git a/src/mpiuse.c b/src/mpiuse.c new file mode 100644 index 0000000000000000000000000000000000000000..f32d9a069e680e0cffc110e288b816ca6475b276 --- /dev/null +++ b/src/mpiuse.c @@ -0,0 +1,354 @@ +/* This file is part of SWIFT. + * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file mpiuse.c + * @brief file of routines to report about MPI tasks used in SWIFT. + */ +/* Config parameters. */ +#include "../config.h" + +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + +/* Standard includes. */ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +/* Local defines. */ +#include "mpiuse.h" + +/* Local includes. */ +#include "atomic.h" +#include "clocks.h" +#include "engine.h" +#include "error.h" +#include "memuse_rnodes.h" + +/* The initial size and increment of the log entries buffer. */ +#define MPIUSE_INITLOG 1000000 + +/* A megabyte for conversions. */ +#define MEGABYTE 1048576.0 + +/* Also recorded in logger. */ +extern int engine_rank; +extern int engine_current_step; + +/* Entry for logger of MPI send and recv requests in a step. */ +struct mpiuse_log_entry { + + /* Type and subtype of MPI task. */ + int type; + int subtype; + + /* Step of action. */ + int step; + + /* Whether an activation, send or recv, or if handoff completed. Not the + * same as delivered, need to match across ranks to see that. */ + int activation; + + /* Memory of the request. */ + size_t size; + + /* Pointer to the request associated with the call. Needs to be + * unique and match to the successful */ + union { + void *ptr; + uint8_t vptr[sizeof(uintptr_t)]; /* For rnode keys. */ + }; + + /* Ticks at time of this action. */ + ticks tic; + + /* Time taken for handoff of this action. */ + ticks acttic; + + /* Whether request is still active, i.e. successful test not seen. */ + int active; + + /* Rank of otherside of communication. */ + int otherrank; + + /* The tag. */ + int tag; +}; + +/* The log of activations and handoffs. All volatile as accessed from threads + * that use the value to synchronise. */ +static struct mpiuse_log_entry *volatile mpiuse_log = NULL; +static volatile size_t mpiuse_log_size = 0; +static volatile size_t mpiuse_log_count = 0; +static volatile size_t mpiuse_log_done = 0; + +/** + * @brief reallocate the entries log if space is needed. + */ +static void mpiuse_log_reallocate(size_t ind) { + + if (ind == 0) { + + /* Need to perform initialization. Be generous. */ + if ((mpiuse_log = (struct mpiuse_log_entry *)malloc( + sizeof(struct mpiuse_log_entry) * MPIUSE_INITLOG)) == NULL) + error("Failed to allocate MPI use log."); + + /* Last action. */ + mpiuse_log_size = MPIUSE_INITLOG; + + } else { + struct mpiuse_log_entry *new_log; + if ((new_log = (struct mpiuse_log_entry *)malloc( + sizeof(struct mpiuse_log_entry) * + (mpiuse_log_size + MPIUSE_INITLOG))) == NULL) + error("Failed to re-allocate MPI use log."); + + /* Wait for all writes to the old buffer to complete. */ + while (mpiuse_log_done < mpiuse_log_size) + ; + + /* Copy to new buffer. */ + memcpy(new_log, mpiuse_log, + sizeof(struct mpiuse_log_entry) * mpiuse_log_size); + free(mpiuse_log); + mpiuse_log = new_log; + + /* Last action, releases waiting threads. */ + atomic_add(&mpiuse_log_size, MPIUSE_INITLOG); + } +} + +/** + * @brief Log an MPI request or handoff. + * + * @param type the task type (send or recv). + * @param subtype the task subtype. + * @param ptr pointer to the MPI request. + * @param activation if not is a successful MPI_Test, not MPI_Isend or + * MPI_Irecv. + * @param size the size in bytes of memory to be transfered or received. + * 0 for a deactivation. + * @param otherrank other rank associated with the transfer. + * @param tag the MPI tag. + */ +void mpiuse_log_allocation(int type, int subtype, void *ptr, int activation, + size_t size, int otherrank, int tag) { + + size_t ind = atomic_inc(&mpiuse_log_count); + + /* If we are at the current size we need more space. */ + if (ind == mpiuse_log_size) mpiuse_log_reallocate(ind); + + /* Other threads wait for space. */ + while (ind > mpiuse_log_size) + ; + + /* Record the log. */ + mpiuse_log[ind].step = engine_current_step; + mpiuse_log[ind].type = type; + mpiuse_log[ind].subtype = subtype; + mpiuse_log[ind].activation = activation; + mpiuse_log[ind].size = size; + mpiuse_log[ind].ptr = ptr; + mpiuse_log[ind].otherrank = otherrank; + mpiuse_log[ind].tag = tag; + mpiuse_log[ind].tic = getticks(); + mpiuse_log[ind].acttic = 0; + mpiuse_log[ind].active = 1; + atomic_inc(&mpiuse_log_done); +} + +/** + * @brief dump the log to a file and reset, if anything to dump. + * + * @param filename name of file for log dump. + * @param stepticks the clock ticks at the start of step, if dumping a step, + * otherwise some locally relative time that might help + * synchronize across ranks. + */ +void mpiuse_log_dump(const char *filename, ticks stepticks) { + + /* Skip if nothing logged this step. */ + if (mpiuse_log_count == 0) return; + + // ticks tic = getticks(); + + /* Create the radix tree root node. */ + struct memuse_rnode *memuse_rnode_root = + (struct memuse_rnode *)calloc(1, sizeof(struct memuse_rnode)); + + /* Stop any new logs from being processed while we are dumping. */ + size_t log_count = mpiuse_log_count; + + /* Open the output file. */ + FILE *fd; + if ((fd = fopen(filename, "w")) == NULL) { + message("Failed to create MPI use log file '%s', logs not dumped.", + filename); + return; + } + + /* Write a header. */ + fprintf(fd, + "# stic etic dtic step rank otherrank type itype subtype isubtype " + "activation tag size sum\n"); + + size_t mpiuse_current = 0; + size_t mpiuse_max = 0; + double mpiuse_sum = 0; + size_t mpiuse_actcount = 0; + for (size_t k = 0; k < log_count; k++) { + + /* Check if this address has already been recorded. */ + struct memuse_rnode *child = memuse_rnode_find_child( + memuse_rnode_root, 0, mpiuse_log[k].vptr, sizeof(uintptr_t)); + + if (child != NULL && child->ptr != NULL) { + + /* Should be the handoff. Check that. */ + if (mpiuse_log[k].activation) { + + /* Used twice, this is an error, but just complain as not fatal. */ +#if SWIFT_DEBUG_CHECKS + message( + "Used the same MPI request address twice " + "(%s/%s: %d->%d: %zd/%d)", + taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], engine_rank, + mpiuse_log[k].otherrank, mpiuse_log[k].size, mpiuse_log[k].tag); +#endif + continue; + } + + /* Free, update the missing fields, size of request is removed. */ + struct mpiuse_log_entry *oldlog = (struct mpiuse_log_entry *)child->ptr; + mpiuse_log[k].size = -oldlog->size; + mpiuse_log[k].otherrank = oldlog->otherrank; + mpiuse_log[k].tag = oldlog->tag; + + /* Time taken to handoff. */ + mpiuse_log[k].acttic = mpiuse_log[k].tic - oldlog->tic; + + /* And deactivate this key. */ + child->ptr = NULL; + + /* And mark this as handed off. */ + mpiuse_log[k].active = 0; + oldlog->active = 0; + + } else if (child == NULL && mpiuse_log[k].activation) { + + /* Not found, so new send/recv which we store the log against the + * address. */ + memuse_rnode_insert_child(memuse_rnode_root, 0, mpiuse_log[k].vptr, + sizeof(uintptr_t), &mpiuse_log[k]); + + } else if (child == NULL && !mpiuse_log[k].activation) { + + /* Unmatched handoff, not OK, but not fatal. */ +#if SWIFT_DEBUG_CHECKS + if (mpiuse_log[k].ptr != NULL) { + message("Unmatched MPI_Test found: (%s/%s: %d->%d: %zd/%d)", + taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], engine_rank, + mpiuse_log[k].otherrank, mpiuse_log[k].size, mpiuse_log[k].tag); + } +#endif + continue; + } else if (mpiuse_log[k].activation) { + + /* Must be previously released request with the same address, so we + * store. */ + memuse_rnode_insert_child(memuse_rnode_root, 0, mpiuse_log[k].vptr, + sizeof(uintptr_t), &mpiuse_log[k]); + + } else { + message("Weird MPI log record found: (%s/%s: %d->%d: %zd/%d/%d/%p)", + taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], engine_rank, + mpiuse_log[k].otherrank, mpiuse_log[k].size, mpiuse_log[k].tag, + mpiuse_log[k].activation, mpiuse_log[k].ptr); + continue; + } + + /* Sum of memory in flight. */ + mpiuse_current += mpiuse_log[k].size; + + /* Gather for stats report. */ + if (mpiuse_log[k].activation) { + if (mpiuse_log[k].size > mpiuse_max) mpiuse_max = mpiuse_log[k].size; + mpiuse_sum += (double)mpiuse_log[k].size; + mpiuse_actcount++; + } + + /* And output. */ + fprintf(fd, "%lld %lld %lld %d %d %d %s %d %s %d %d %d %zd %zd\n", + mpiuse_log[k].tic - stepticks, + mpiuse_log[k].tic - clocks_start_ticks, mpiuse_log[k].acttic, + mpiuse_log[k].step, engine_rank, mpiuse_log[k].otherrank, + taskID_names[mpiuse_log[k].type], mpiuse_log[k].type, + subtaskID_names[mpiuse_log[k].subtype], mpiuse_log[k].subtype, + mpiuse_log[k].activation, mpiuse_log[k].tag, mpiuse_log[k].size, + mpiuse_current); + } + +#ifdef MEMUSE_RNODE_DUMP + /* Debug dump of tree. */ + // memuse_rnode_dump(0, memuse_rnode_root, 0); +#endif + + /* Write our statistics. */ + fprintf(fd, "##\n"); + fprintf(fd, "## Number of requests: %zd\n", mpiuse_actcount); + fprintf(fd, "## Maximum request size: %.4f (MB)\n", mpiuse_max / MEGABYTE); + fprintf(fd, "## Sum of all requests: %.4f (MB)\n", mpiuse_sum / MEGABYTE); + fprintf(fd, "## Mean of all requests: %.4f (MB)\n", + mpiuse_sum / (double)mpiuse_actcount / MEGABYTE); + fprintf(fd, "##\n"); + + /* Now check any still active logs, these are errors all should match. */ + if (mpiuse_current != 0) { + message("Some MPI requests have not been completed"); + for (size_t k = 0; k < log_count; k++) { + if (mpiuse_log[k].active) + message("%s/%s: %d->%d: %zd/%d)", taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], engine_rank, + mpiuse_log[k].otherrank, mpiuse_log[k].size, mpiuse_log[k].tag); + } + } + + /* Finished with the rnodes. */ + memuse_rnode_cleanup(memuse_rnode_root); + + /* Clear the log. We expect this to clear step to step, unlike memory. */ + mpiuse_log_count = 0; + mpiuse_log_done = 0; + + /* Close the file. */ + fflush(fd); + fclose(fd); + + // message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + // clocks_getunit()); +} + +#endif /* defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) */ diff --git a/src/mpiuse.h b/src/mpiuse.h new file mode 100644 index 0000000000000000000000000000000000000000..20c6d05e1cc2d6b3995fbfc1dc2f69809ed093c9 --- /dev/null +++ b/src/mpiuse.h @@ -0,0 +1,44 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_MPIUSE_H +#define SWIFT_MPIUSE_H + +/* Config parameters. */ +#include "../config.h" + +/* Local includes. */ +#include "cycle.h" + +/* Includes. */ +#include <stdlib.h> + +/* API. */ +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) +void mpiuse_log_dump(const char *filename, ticks stepticks); +void mpiuse_log_allocation(int type, int subtype, void *ptr, int activation, + size_t size, int otherrank, int tag); +#else + +/* No-op when not reporting. */ +#define mpiuse_log_allocation(type, subtype, ptr, activation, size, otherrank, \ + tag) \ + ; +#endif /* defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) */ + +#endif /* SWIFT_MPIUSE_H */ diff --git a/src/runner_doiact_functions_hydro.h b/src/runner_doiact_functions_hydro.h index c324c759b5acc9db75cf0849d0e417b2141978f4..fcbd72af443c63cfa63f1a00b80372408fa2dbde 100644 --- a/src/runner_doiact_functions_hydro.h +++ b/src/runner_doiact_functions_hydro.h @@ -115,6 +115,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } if (r2 < hjg2 && pj_active) { @@ -127,6 +128,7 @@ void DOPAIR1_NAIVE(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } @@ -227,6 +229,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (pi_active) { @@ -234,6 +237,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (pj_active) { @@ -245,6 +249,7 @@ void DOPAIR2_NAIVE(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } @@ -333,6 +338,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (doi) { @@ -340,6 +346,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (doj) { @@ -351,6 +358,7 @@ void DOSELF1_NAIVE(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } /* loop over the parts in cj. */ @@ -438,6 +446,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (doi) { @@ -445,6 +454,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (doj) { @@ -456,6 +466,7 @@ void DOSELF2_NAIVE(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } /* loop over the parts in cj. */ @@ -542,6 +553,7 @@ void DOPAIR_SUBSET_NAIVE(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, pj->h, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, pj->h, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, pj->h, pi, pj, a, H); #endif } } /* loop over the parts in cj. */ @@ -635,6 +647,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } /* loop over the parts in cj. */ @@ -691,6 +704,7 @@ void DOPAIR_SUBSET(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } /* loop over the parts in cj. */ @@ -833,6 +847,7 @@ void DOSELF_SUBSET(struct runner *r, struct cell *restrict ci, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } /* loop over the parts in cj. */ @@ -997,6 +1012,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } /* loop over the parts in cj. */ @@ -1085,6 +1101,7 @@ void DOPAIR1(struct runner *r, struct cell *ci, struct cell *cj, const int sid, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } /* loop over the parts in ci. */ @@ -1385,6 +1402,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } /* loop over the active parts in cj. */ @@ -1456,12 +1474,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } @@ -1565,6 +1585,7 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } /* loop over the active parts in ci. */ @@ -1638,12 +1659,14 @@ void DOPAIR2(struct runner *r, struct cell *ci, struct cell *cj, const int sid, #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } else { IACT_NONSYM(r2, dx, hj, hi, pj, pi, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } @@ -1833,6 +1856,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } /* loop over all other particles. */ @@ -1885,6 +1909,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (doi) { @@ -1892,6 +1917,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else if (doj) { @@ -1902,6 +1928,7 @@ void DOSELF1(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } @@ -2029,6 +2056,7 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, hi, pj, pi, a, H); #endif } } /* loop over all other particles. */ @@ -2076,12 +2104,14 @@ void DOSELF2(struct runner *r, struct cell *restrict c) { #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } else { IACT_NONSYM(r2, dx, hi, hj, pi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) runner_iact_nonsym_chemistry(r2, dx, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, hj, pi, pj, a, H); #endif } } diff --git a/src/runner_doiact_functions_stars.h b/src/runner_doiact_functions_stars.h index b0d731857e9b4b0474e47c3ac3fca540eecb1cbb..d452fba01b38de09d71ca4a121dcd5d92388bc08 100644 --- a/src/runner_doiact_functions_stars.h +++ b/src/runner_doiact_functions_stars.h @@ -58,7 +58,9 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { const int count = c->hydro.count; struct spart *restrict sparts = c->stars.parts; struct part *restrict parts = c->hydro.parts; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xparts = c->hydro.xparts; +#endif /* Loop over the sparts in ci. */ for (int sid = 0; sid < scount; sid++) { @@ -83,7 +85,9 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts[pjd]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xpj = &xparts[pjd]; +#endif const float hj = pj->h; /* Early abort? */ @@ -105,7 +109,7 @@ void DOSELF1_STARS(struct runner *r, struct cell *c, int timer) { if (r2 < hig2) { IACT_STARS(r2, dx, hi, hj, si, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, NULL, cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, @@ -153,7 +157,9 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, const int count_j = cj->hydro.count; struct spart *restrict sparts_i = ci->stars.parts; struct part *restrict parts_j = cj->hydro.parts; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xparts_j = cj->hydro.xparts; +#endif /* Get the relative distance between the pairs, wrapping. */ double shift[3] = {0.0, 0.0, 0.0}; @@ -187,7 +193,9 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xpj = &xparts_j[pjd]; +#endif const float hj = pj->h; /* Skip inhibited particles. */ @@ -210,7 +218,7 @@ void DO_NONSYM_PAIR1_STARS_NAIVE(struct runner *r, struct cell *restrict ci, IACT_STARS(r2, dx, hi, hj, si, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, xpj, cosmo, + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, si, pj, NULL, cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, si, pj, xpj, cosmo, @@ -286,7 +294,9 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, const int count_j = cj->hydro.count; struct spart *restrict sparts_i = ci->stars.parts; struct part *restrict parts_j = cj->hydro.parts; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xparts_j = cj->hydro.xparts; +#endif const double dj_min = sort_j[0].d; const float dx_max_rshift = (ci->stars.dx_max_sort + cj->hydro.dx_max_sort) - rshift; @@ -326,7 +336,9 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, /* Recover pj */ struct part *pj = &parts_j[sort_j[pjd].i]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *xpj = &xparts_j[sort_j[pjd].i]; +#endif /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; @@ -379,7 +391,7 @@ void DO_SYM_PAIR1_STARS(struct runner *r, struct cell *ci, struct cell *cj, IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, NULL, cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, @@ -574,7 +586,9 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, const int count_j = cj->hydro.count; struct part *restrict parts_j = cj->hydro.parts; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xparts_j = cj->hydro.xparts; +#endif /* Early abort? */ if (count_j == 0) return; @@ -604,7 +618,9 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; +#endif /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; @@ -632,7 +648,7 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, NULL, cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, @@ -664,7 +680,9 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[sort_j[pjd].i]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xpj = &xparts_j[sort_j[pjd].i]; +#endif /* Skip inhibited particles. */ if (part_is_inhibited(pj, e)) continue; @@ -692,7 +710,7 @@ void DOPAIR1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, NULL, cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, @@ -737,7 +755,9 @@ void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, const int count_j = cj->hydro.count; struct part *restrict parts_j = cj->hydro.parts; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xparts_j = cj->hydro.xparts; +#endif /* Early abort? */ if (count_j == 0) return; @@ -764,7 +784,9 @@ void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xpj = &xparts_j[pjd]; +#endif /* Skip inhibited particles */ if (part_is_inhibited(pj, e)) continue; @@ -789,8 +811,8 @@ void DOPAIR1_SUBSET_STARS_NAIVE(struct runner *r, struct cell *restrict ci, IACT_STARS(r2, dx, hi, hj, spi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, xpj, cosmo, - ti_current); + runner_iact_nonsym_feedback_density(r2, dx, hi, hj, spi, pj, NULL, + cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, hj, spi, pj, xpj, cosmo, ti_current); @@ -828,7 +850,9 @@ void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, const int count_i = ci->hydro.count; struct part *restrict parts_j = ci->hydro.parts; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xparts_j = ci->hydro.xparts; +#endif /* Early abort? */ if (count_i == 0) return; @@ -854,7 +878,9 @@ void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, /* Get a pointer to the jth particle. */ struct part *restrict pj = &parts_j[pjd]; +#if (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) struct xpart *restrict xpj = &xparts_j[pjd]; +#endif /* Early abort? */ if (part_is_inhibited(pj, e)) continue; @@ -876,7 +902,7 @@ void DOSELF1_SUBSET_STARS(struct runner *r, struct cell *restrict ci, if (r2 < hig2) { IACT_STARS(r2, dx, hi, pj->h, spi, pj, a, H); #if (FUNCTION_TASK_LOOP == TASK_LOOP_DENSITY) - runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, xpj, + runner_iact_nonsym_feedback_density(r2, dx, hi, pj->h, spi, pj, NULL, cosmo, ti_current); #elif (FUNCTION_TASK_LOOP == TASK_LOOP_FEEDBACK) runner_iact_nonsym_feedback_apply(r2, dx, hi, pj->h, spi, pj, xpj, diff --git a/src/runner_doiact_hydro.c b/src/runner_doiact_hydro.c index 480ea59f0a536aa340b7e4d8f838bef3a0cca072..4638513c718257e99b39b1c9b17368e6a2b4bfa2 100644 --- a/src/runner_doiact_hydro.c +++ b/src/runner_doiact_hydro.c @@ -30,6 +30,7 @@ #include "runner.h" #include "runner_doiact_hydro_vec.h" #include "space_getsid.h" +#include "star_formation_iact.h" #include "timers.h" /* Import the density loop functions. */ diff --git a/src/runner_ghost.c b/src/runner_ghost.c index 2c1e8cd7190858014f7914e293b5ffdadbdc2707..1d6fec0885aa41532b912d12795b5e455e2dc8d5 100644 --- a/src/runner_ghost.c +++ b/src/runner_ghost.c @@ -34,6 +34,7 @@ #include "pressure_floor.h" #include "pressure_floor_iact.h" #include "space_getsid.h" +#include "star_formation.h" #include "stars.h" #include "timers.h" #include "tracers.h" @@ -943,6 +944,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { const struct hydro_space *hs = &s->hs; const struct cosmology *cosmo = e->cosmology; const struct chemistry_global_data *chemistry = e->chemistry; + const struct star_formation *star_formation = e->star_formation; const int with_cosmology = (e->policy & engine_policy_cosmology); @@ -1040,6 +1042,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { hydro_end_density(p, cosmo); chemistry_end_density(p, chemistry, cosmo); pressure_floor_end_density(p, cosmo); + star_formation_end_density(p, star_formation, cosmo); /* Compute one step of the Newton-Raphson scheme */ const float n_sum = p->density.wcount * h_old_dim; @@ -1189,6 +1192,7 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { hydro_init_part(p, hs); chemistry_init_part(p, chemistry); pressure_floor_init_part(p, xp); + star_formation_init_part(p, star_formation); tracers_after_init(p, xp, e->internal_units, e->physical_constants, with_cosmology, e->cosmology, e->hydro_properties, e->cooling_func, e->time); @@ -1211,6 +1215,8 @@ void runner_do_ghost(struct runner *r, struct cell *c, int timer) { hydro_part_has_no_neighbours(p, xp, cosmo); chemistry_part_has_no_neighbours(p, xp, chemistry, cosmo); pressure_floor_part_has_no_neighbours(p, xp, cosmo); + star_formation_part_has_no_neighbours(p, xp, star_formation, + cosmo); } } else { diff --git a/src/scheduler.c b/src/scheduler.c index 1fad63fd7141db2aad486aaaa7e4dc877a8aa3b8..e08b0225d15b434649c0ad8f56bbdf3494b76285 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -47,6 +47,7 @@ #include "intrinsics.h" #include "kernel_hydro.h" #include "memuse.h" +#include "mpiuse.h" #include "queue.h" #include "sort_part.h" #include "space.h" @@ -1694,281 +1695,234 @@ void scheduler_enqueue(struct scheduler *s, struct task *t) { break; case task_type_recv: #ifdef WITH_MPI + { + size_t size = 0; /* Size in bytes. */ + size_t count = 0; /* Number of elements to receive */ + MPI_Datatype type = MPI_BYTE; /* Type of the elements */ + void *buff = NULL; /* Buffer to accept elements */ + if (t->subtype == task_subtype_tend_part) { - t->buff = (struct pcell_step_hydro *)malloc( - sizeof(struct pcell_step_hydro) * t->ci->mpi.pcell_size); - err = MPI_Irecv( - t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro), - MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = size = + t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro); + buff = t->buff = malloc(count); + } else if (t->subtype == task_subtype_tend_gpart) { - t->buff = (struct pcell_step_grav *)malloc( - sizeof(struct pcell_step_grav) * t->ci->mpi.pcell_size); - err = MPI_Irecv( - t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav), - MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = size = t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav); + buff = t->buff = malloc(count); + } else if (t->subtype == task_subtype_tend_spart) { - t->buff = (struct pcell_step_stars *)malloc( - sizeof(struct pcell_step_stars) * t->ci->mpi.pcell_size); - err = MPI_Irecv( - t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars), - MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = size = + t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars); + buff = t->buff = malloc(count); + } else if (t->subtype == task_subtype_tend_bpart) { - t->buff = (struct pcell_step_black_holes *)malloc( - sizeof(struct pcell_step_black_holes) * t->ci->mpi.pcell_size); - err = MPI_Irecv( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_black_holes), - MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = size = + t->ci->mpi.pcell_size * sizeof(struct pcell_step_black_holes); + buff = t->buff = malloc(count); + } else if (t->subtype == task_subtype_part_swallow) { - t->buff = (struct black_holes_part_data *)malloc( - sizeof(struct black_holes_part_data) * t->ci->hydro.count); - err = MPI_Irecv( - t->buff, - t->ci->hydro.count * sizeof(struct black_holes_part_data), - MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = size = + t->ci->hydro.count * sizeof(struct black_holes_part_data); + buff = t->buff = malloc(count); + } else if (t->subtype == task_subtype_bpart_merger) { - t->buff = (struct black_holes_bpart_data *)malloc( - sizeof(struct black_holes_bpart_data) * t->ci->black_holes.count); - err = MPI_Irecv( - t->buff, - t->ci->black_holes.count * sizeof(struct black_holes_bpart_data), - MPI_BYTE, t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + count = size = + sizeof(struct black_holes_bpart_data) * t->ci->black_holes.count; + buff = t->buff = malloc(count); + } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { - err = MPI_Irecv(t->ci->hydro.parts, t->ci->hydro.count, part_mpi_type, - t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = t->ci->hydro.count; + size = count * sizeof(struct part); + type = part_mpi_type; + buff = t->ci->hydro.parts; + } else if (t->subtype == task_subtype_gpart) { - err = MPI_Irecv(t->ci->grav.parts, t->ci->grav.count, gpart_mpi_type, - t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = t->ci->grav.count; + size = count * sizeof(struct gpart); + type = gpart_mpi_type; + buff = t->ci->grav.parts; + } else if (t->subtype == task_subtype_spart) { - err = MPI_Irecv(t->ci->stars.parts, t->ci->stars.count, - spart_mpi_type, t->ci->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = t->ci->stars.count; + size = count * sizeof(struct spart); + type = spart_mpi_type; + buff = t->ci->stars.parts; + } else if (t->subtype == task_subtype_bpart_rho || t->subtype == task_subtype_bpart_swallow || t->subtype == task_subtype_bpart_feedback) { - err = MPI_Irecv(t->ci->black_holes.parts, t->ci->black_holes.count, - bpart_mpi_type, t->ci->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = t->ci->black_holes.count; + size = count * sizeof(struct bpart); + type = bpart_mpi_type; + buff = t->ci->black_holes.parts; + } else if (t->subtype == task_subtype_multipole) { - t->buff = (struct gravity_tensors *)malloc( - sizeof(struct gravity_tensors) * t->ci->mpi.pcell_size); - err = MPI_Irecv(t->buff, t->ci->mpi.pcell_size, multipole_mpi_type, - t->ci->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = t->ci->mpi.pcell_size; + size = count * sizeof(struct gravity_tensors); + type = multipole_mpi_type; + buff = t->buff = malloc(size); + } else if (t->subtype == task_subtype_sf_counts) { - t->buff = (struct pcell_sf *)malloc(sizeof(struct pcell_sf) * - t->ci->mpi.pcell_size); - err = MPI_Irecv(t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_sf), - MPI_BYTE, t->ci->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = size = t->ci->mpi.pcell_size * sizeof(struct pcell_sf); + buff = t->buff = malloc(count); + } else { error("Unknown communication sub-type"); } + + err = MPI_Irecv(buff, count, type, t->ci->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); + if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit irecv for particle data."); } + + /* And log, if logging enabled. */ + mpiuse_log_allocation(t->type, t->subtype, &t->req, 1, size, + t->ci->nodeID, t->flags); + qid = 1 % s->nr_queues; + } #else error("SWIFT was not compiled with MPI support."); #endif - break; + break; case task_type_send: #ifdef WITH_MPI + { + size_t size = 0; /* Size in bytes. */ + size_t count = 0; /* Number of elements to send */ + MPI_Datatype type = MPI_BYTE; /* Type of the elements */ + void *buff = NULL; /* Buffer to send */ + if (t->subtype == task_subtype_tend_part) { - t->buff = (struct pcell_step_hydro *)malloc( - sizeof(struct pcell_step_hydro) * t->ci->mpi.pcell_size); - cell_pack_end_step_hydro(t->ci, (struct pcell_step_hydro *)t->buff); - - if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro)) > - s->mpi_message_limit) { - err = MPI_Isend( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } else { - err = MPI_Issend( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } + + size = count = + t->ci->mpi.pcell_size * sizeof(struct pcell_step_hydro); + buff = t->buff = malloc(size); + cell_pack_end_step_hydro(t->ci, (struct pcell_step_hydro *)buff); + } else if (t->subtype == task_subtype_tend_gpart) { - t->buff = (struct pcell_step_grav *)malloc( - sizeof(struct pcell_step_grav) * t->ci->mpi.pcell_size); - cell_pack_end_step_grav(t->ci, (struct pcell_step_grav *)t->buff); - - if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav)) > - s->mpi_message_limit) { - err = MPI_Isend( - t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } else { - err = MPI_Issend( - t->buff, t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } + + size = count = t->ci->mpi.pcell_size * sizeof(struct pcell_step_grav); + buff = t->buff = malloc(size); + cell_pack_end_step_grav(t->ci, (struct pcell_step_grav *)buff); + } else if (t->subtype == task_subtype_tend_spart) { - t->buff = (struct pcell_step_stars *)malloc( - sizeof(struct pcell_step_stars) * t->ci->mpi.pcell_size); - cell_pack_end_step_stars(t->ci, (struct pcell_step_stars *)t->buff); - - if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars)) > - s->mpi_message_limit) { - err = MPI_Isend( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } else { - err = MPI_Issend( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } + + size = count = + t->ci->mpi.pcell_size * sizeof(struct pcell_step_stars); + buff = t->buff = malloc(size); + cell_pack_end_step_stars(t->ci, (struct pcell_step_stars *)buff); + } else if (t->subtype == task_subtype_tend_bpart) { - t->buff = (struct pcell_step_black_holes *)malloc( - sizeof(struct pcell_step_black_holes) * t->ci->mpi.pcell_size); - cell_pack_end_step_black_holes( - t->ci, (struct pcell_step_black_holes *)t->buff); - - if ((t->ci->mpi.pcell_size * sizeof(struct pcell_step_black_holes)) > - s->mpi_message_limit) { - err = MPI_Isend( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_black_holes), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } else { - err = MPI_Issend( - t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_step_black_holes), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } + + size = count = + t->ci->mpi.pcell_size * sizeof(struct pcell_step_black_holes); + buff = t->buff = malloc(size); + cell_pack_end_step_black_holes(t->ci, + (struct pcell_step_black_holes *)buff); + } else if (t->subtype == task_subtype_part_swallow) { - t->buff = (struct black_holes_part_data *)malloc( - sizeof(struct black_holes_part_data) * t->ci->hydro.count); - cell_pack_part_swallow(t->ci, - (struct black_holes_part_data *)t->buff); - - if (t->ci->hydro.count * sizeof(struct black_holes_part_data) > - s->mpi_message_limit) { - err = MPI_Isend( - t->buff, - t->ci->hydro.count * sizeof(struct black_holes_part_data), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } else { - err = MPI_Issend( - t->buff, - t->ci->hydro.count * sizeof(struct black_holes_part_data), - MPI_BYTE, t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); - } + + size = count = + t->ci->hydro.count * sizeof(struct black_holes_part_data); + buff = t->buff = malloc(size); + cell_pack_part_swallow(t->ci, (struct black_holes_part_data *)buff); + } else if (t->subtype == task_subtype_bpart_merger) { - t->buff = (struct black_holes_bpart_data *)malloc( - sizeof(struct black_holes_bpart_data) * t->ci->black_holes.count); + + size = count = + sizeof(struct black_holes_bpart_data) * t->ci->black_holes.count; + buff = t->buff = malloc(size); cell_pack_bpart_swallow(t->ci, (struct black_holes_bpart_data *)t->buff); - if (t->ci->black_holes.count * sizeof(struct black_holes_bpart_data) > - s->mpi_message_limit) { - err = MPI_Isend(t->buff, - t->ci->black_holes.count * - sizeof(struct black_holes_bpart_data), - MPI_BYTE, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - } else { - err = MPI_Issend(t->buff, - t->ci->black_holes.count * - sizeof(struct black_holes_bpart_data), - MPI_BYTE, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - } - } else if (t->subtype == task_subtype_xv || t->subtype == task_subtype_rho || t->subtype == task_subtype_gradient) { - if ((t->ci->hydro.count * sizeof(struct part)) > s->mpi_message_limit) - err = MPI_Isend(t->ci->hydro.parts, t->ci->hydro.count, - part_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - else - err = MPI_Issend(t->ci->hydro.parts, t->ci->hydro.count, - part_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = t->ci->hydro.count; + size = count * sizeof(struct part); + type = part_mpi_type; + buff = t->ci->hydro.parts; + } else if (t->subtype == task_subtype_gpart) { - if ((t->ci->grav.count * sizeof(struct gpart)) > s->mpi_message_limit) - err = MPI_Isend(t->ci->grav.parts, t->ci->grav.count, - gpart_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - else - err = MPI_Issend(t->ci->grav.parts, t->ci->grav.count, - gpart_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = t->ci->grav.count; + size = count * sizeof(struct gpart); + type = gpart_mpi_type; + buff = t->ci->grav.parts; + } else if (t->subtype == task_subtype_spart) { - if ((t->ci->stars.count * sizeof(struct spart)) > - s->mpi_message_limit) - err = MPI_Isend(t->ci->stars.parts, t->ci->stars.count, - spart_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - else - err = MPI_Issend(t->ci->stars.parts, t->ci->stars.count, - spart_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = t->ci->stars.count; + size = count * sizeof(struct spart); + type = spart_mpi_type; + buff = t->ci->stars.parts; + } else if (t->subtype == task_subtype_bpart_rho || t->subtype == task_subtype_bpart_swallow || t->subtype == task_subtype_bpart_feedback) { - if ((t->ci->black_holes.count * sizeof(struct bpart)) > - s->mpi_message_limit) - err = MPI_Isend(t->ci->black_holes.parts, t->ci->black_holes.count, - bpart_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); - else - err = MPI_Issend(t->ci->black_holes.parts, t->ci->black_holes.count, - bpart_mpi_type, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + + count = t->ci->black_holes.count; + size = count * sizeof(struct bpart); + type = bpart_mpi_type; + buff = t->ci->black_holes.parts; + } else if (t->subtype == task_subtype_multipole) { - t->buff = (struct gravity_tensors *)malloc( - sizeof(struct gravity_tensors) * t->ci->mpi.pcell_size); - cell_pack_multipoles(t->ci, (struct gravity_tensors *)t->buff); - err = MPI_Isend(t->buff, t->ci->mpi.pcell_size, multipole_mpi_type, - t->cj->nodeID, t->flags, subtaskMPI_comms[t->subtype], - &t->req); + + count = t->ci->mpi.pcell_size; + size = count * sizeof(struct gravity_tensors); + type = multipole_mpi_type; + buff = t->buff = malloc(size); + cell_pack_multipoles(t->ci, (struct gravity_tensors *)buff); + } else if (t->subtype == task_subtype_sf_counts) { - t->buff = (struct pcell_sf *)malloc(sizeof(struct pcell_sf) * - t->ci->mpi.pcell_size); + + size = count = t->ci->mpi.pcell_size * sizeof(struct pcell_sf); + buff = t->buff = malloc(size); cell_pack_sf_counts(t->ci, (struct pcell_sf *)t->buff); - err = MPI_Isend(t->buff, - t->ci->mpi.pcell_size * sizeof(struct pcell_sf), - MPI_BYTE, t->cj->nodeID, t->flags, - subtaskMPI_comms[t->subtype], &t->req); + } else { error("Unknown communication sub-type"); } + + if (size > s->mpi_message_limit) { + err = MPI_Isend(buff, count, type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); + } else { + err = MPI_Issend(buff, count, type, t->cj->nodeID, t->flags, + subtaskMPI_comms[t->subtype], &t->req); + } + if (err != MPI_SUCCESS) { mpi_error(err, "Failed to emit isend for particle data."); } + + /* And log, if logging enabled. */ + mpiuse_log_allocation(t->type, t->subtype, &t->req, 1, size, + t->cj->nodeID, t->flags); + qid = 0; + } #else error("SWIFT was not compiled with MPI support."); #endif - break; + break; default: qid = -1; } diff --git a/src/space.c b/src/space.c index eb498035d7c912f331870cfb0bb8bf84ad1559c4..30aa7f8a7d400bfd58cbd9639ba3eac53014931e 100644 --- a/src/space.c +++ b/src/space.c @@ -59,6 +59,7 @@ #include "pressure_floor.h" #include "restart.h" #include "sort_part.h" +#include "star_formation.h" #include "star_formation_logger.h" #include "stars.h" #include "threadpool.h" @@ -4041,6 +4042,7 @@ void space_first_init_parts_mapper(void *restrict map_data, int count, const int with_gravity = e->policy & engine_policy_self_gravity; const struct chemistry_global_data *chemistry = e->chemistry; + const struct star_formation *star_formation = e->star_formation; const struct cooling_function_data *cool_func = e->cooling_func; /* Check that the smoothing lengths are non-zero */ @@ -4094,6 +4096,10 @@ void space_first_init_parts_mapper(void *restrict map_data, int count, /* Also initialise the pressure floor */ pressure_floor_first_init_part(phys_const, us, cosmo, &p[k], &xp[k]); + /* Also initialise the star formation */ + star_formation_first_init_part(phys_const, us, cosmo, star_formation, &p[k], + &xp[k]); + /* And the cooling */ cooling_first_init_part(phys_const, us, cosmo, cool_func, &p[k], &xp[k]); diff --git a/src/star_formation/EAGLE/star_formation.h b/src/star_formation/EAGLE/star_formation.h index 851f493801dc5cb0beee9cd07ea5415a5ad1ccf1..6a32add9b861981bb8b2a3ecfa307f44526a5ea5 100644 --- a/src/star_formation/EAGLE/star_formation.h +++ b/src/star_formation/EAGLE/star_formation.h @@ -632,4 +632,69 @@ INLINE static void starformation_print_backend( starform->max_gas_density_HpCM3); } +/** + * @brief Finishes the density calculation. + * + * Nothing to do here. We do not need to compute any quantity in the hydro + * density loop for the EAGLE star formation model. + * + * @param p The particle to act upon + * @param cd The global star_formation information. + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void star_formation_end_density( + struct part* restrict p, const struct star_formation* cd, + const struct cosmology* cosmo) {} + +/** + * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. + * + * Nothing to do here. We do not need to compute any quantity in the hydro + * density loop for the EAGLE star formation model. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param cd #star_formation containing star_formation informations. + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void +star_formation_part_has_no_neighbours(struct part* restrict p, + struct xpart* restrict xp, + const struct star_formation* cd, + const struct cosmology* cosmo) {} + +/** + * @brief Sets the star_formation properties of the (x-)particles to a valid + * state to start the density loop. + * + * Nothing to do here. We do not need to compute any quantity in the hydro + * density loop for the EAGLE star formation model. + * + * @param data The global star_formation information used for this run. + * @param p Pointer to the particle data. + */ +__attribute__((always_inline)) INLINE static void star_formation_init_part( + struct part* restrict p, const struct star_formation* data) {} + +/** + * @brief Sets the star_formation properties of the (x-)particles to a valid + * start state at the beginning of the simulation after the ICs have been read. + * + * Nothing to do here. + * + * @param phys_const The physical constant in internal units. + * @param us The unit system. + * @param cosmo The current cosmological model. + * @param data The global star_formation information used for this run. + * @param p Pointer to the particle data. + * @param xp Pointer to the extended particle data. + */ +__attribute__((always_inline)) INLINE static void +star_formation_first_init_part(const struct phys_const* restrict phys_const, + const struct unit_system* restrict us, + const struct cosmology* restrict cosmo, + const struct star_formation* data, + const struct part* restrict p, + struct xpart* restrict xp) {} + #endif /* SWIFT_EAGLE_STAR_FORMATION_H */ diff --git a/src/star_formation/EAGLE/star_formation_iact.h b/src/star_formation/EAGLE/star_formation_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..ab917cbe7aa67cad93a92a4b24212c5f1dcf3aeb --- /dev/null +++ b/src/star_formation/EAGLE/star_formation_iact.h @@ -0,0 +1,71 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_EAGLE_STAR_FORMATION_IACT_H +#define SWIFT_EAGLE_STAR_FORMATION_IACT_H + +/** + * @file EAGLE/star_formation_iact.h + * @brief Density computation + */ + +/** + * @brief do star_formation computation after the runner_iact_density (symmetric + * version) + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param pi First particle. + * @param pj Second particle. + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_star_formation( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) { + + /* Nothing to do here. We do not need to compute any quantity in the hydro + density loop for the EAGLE star formation model. */ +} + +/** + * @brief do star_formation computation after the runner_iact_density (non + * symmetric version) + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param pi First particle. + * @param pj Second particle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_star_formation(float r2, const float *dx, float hi, float hj, + struct part *restrict pi, + const struct part *restrict pj, float a, + float H) { + + /* Nothing to do here. We do not need to compute any quantity in the hydro + density loop for the EAGLE star formation model. */ +} + +#endif /* SWIFT_EAGLE_STAR_FORMATION_IACT_H */ diff --git a/src/star_formation/GEAR/star_formation.h b/src/star_formation/GEAR/star_formation.h index 5fc3380fe6869bd5bcb9435fb0c129ac6fc0aad2..da43778ee958019120665c54210be9ef9c2953e0 100644 --- a/src/star_formation/GEAR/star_formation.h +++ b/src/star_formation/GEAR/star_formation.h @@ -223,4 +223,66 @@ INLINE static void starformation_print_backend( message("Star formation law is 'GEAR'"); } +/** + * @brief Finishes the density calculation. + * + * Nothing to do here. + * + * @param p The particle to act upon + * @param cd The global star_formation information. + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void star_formation_end_density( + struct part* restrict p, const struct star_formation* cd, + const struct cosmology* cosmo) {} + +/** + * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. + * + * Nothing to do here. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param cd #star_formation containing star_formation informations. + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void +star_formation_part_has_no_neighbours(struct part* restrict p, + struct xpart* restrict xp, + const struct star_formation* cd, + const struct cosmology* cosmo) {} + +/** + * @brief Sets the star_formation properties of the (x-)particles to a valid + * state to start the density loop. + * + * Nothing to do here. + * + * @param data The global star_formation information used for this run. + * @param p Pointer to the particle data. + */ +__attribute__((always_inline)) INLINE static void star_formation_init_part( + struct part* restrict p, const struct star_formation* data) {} + +/** + * @brief Sets the star_formation properties of the (x-)particles to a valid + * start state at the beginning of the simulation after the ICs have been read. + * + * Nothing to do here. + * + * @param phys_const The physical constant in internal units. + * @param us The unit system. + * @param cosmo The current cosmological model. + * @param data The global star_formation information used for this run. + * @param p Pointer to the particle data. + * @param xp Pointer to the extended particle data. + */ +__attribute__((always_inline)) INLINE static void +star_formation_first_init_part(const struct phys_const* restrict phys_const, + const struct unit_system* restrict us, + const struct cosmology* restrict cosmo, + const struct star_formation* data, + const struct part* restrict p, + struct xpart* restrict xp) {} + #endif /* SWIFT_GEAR_STAR_FORMATION_H */ diff --git a/src/star_formation/GEAR/star_formation_iact.h b/src/star_formation/GEAR/star_formation_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..749b608068650a27cbe4c9a0ca4126d2740337f3 --- /dev/null +++ b/src/star_formation/GEAR/star_formation_iact.h @@ -0,0 +1,63 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Loic Hausammann (loic.hausammann@epfl.ch) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_GEAR_STAR_FORMATION_IACT_H +#define SWIFT_GEAR_STAR_FORMATION_IACT_H + +/** + * @file GEAR/star_formation_iact.h + * @brief Density computation + */ + +/** + * @brief do star_formation computation after the runner_iact_density (symmetric + * version) + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param pi First particle. + * @param pj Second particle. + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_star_formation( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) {} + +/** + * @brief do star_formation computation after the runner_iact_density (non + * symmetric version) + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param pi First particle. + * @param pj Second particle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_star_formation(float r2, const float *dx, float hi, float hj, + struct part *restrict pi, + const struct part *restrict pj, float a, + float H) {} + +#endif /* SWIFT_GEAR_STAR_FORMATION_IACT_H */ diff --git a/src/star_formation/none/star_formation.h b/src/star_formation/none/star_formation.h index 0f53e951cb5842e5be3bb9bbe64eb6686f822b1e..96b1315ffe1c6a78e2375999c0cc447c0474aad2 100644 --- a/src/star_formation/none/star_formation.h +++ b/src/star_formation/none/star_formation.h @@ -163,4 +163,66 @@ INLINE static void starformation_print_backend( message("Star formation law is 'No Star Formation'"); } +/** + * @brief Finishes the density calculation. + * + * Nothing to do here. + * + * @param p The particle to act upon + * @param cd The global star_formation information. + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void star_formation_end_density( + struct part* restrict p, const struct star_formation* cd, + const struct cosmology* cosmo) {} + +/** + * @brief Sets all particle fields to sensible values when the #part has 0 ngbs. + * + * Nothing to do here. + * + * @param p The particle to act upon + * @param xp The extended particle data to act upon + * @param cd #star_formation containing star_formation informations. + * @param cosmo The current cosmological model. + */ +__attribute__((always_inline)) INLINE static void +star_formation_part_has_no_neighbours(struct part* restrict p, + struct xpart* restrict xp, + const struct star_formation* cd, + const struct cosmology* cosmo) {} + +/** + * @brief Sets the star_formation properties of the (x-)particles to a valid + * state to start the density loop. + * + * Nothing to do here. + * + * @param data The global star_formation information used for this run. + * @param p Pointer to the particle data. + */ +__attribute__((always_inline)) INLINE static void star_formation_init_part( + struct part* restrict p, const struct star_formation* data) {} + +/** + * @brief Sets the star_formation properties of the (x-)particles to a valid + * start state at the beginning of the simulation after the ICs have been read. + * + * Nothing to do here. + * + * @param phys_const The physical constant in internal units. + * @param us The unit system. + * @param cosmo The current cosmological model. + * @param data The global star_formation information used for this run. + * @param p Pointer to the particle data. + * @param xp Pointer to the extended particle data. + */ +__attribute__((always_inline)) INLINE static void +star_formation_first_init_part(const struct phys_const* restrict phys_const, + const struct unit_system* restrict us, + const struct cosmology* restrict cosmo, + const struct star_formation* data, + const struct part* restrict p, + struct xpart* restrict xp) {} + #endif /* SWIFT_NONE_STAR_FORMATION_H */ diff --git a/src/star_formation/none/star_formation_iact.h b/src/star_formation/none/star_formation_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..91ee8a6726d68b3d697e6024904422d16b2bc136 --- /dev/null +++ b/src/star_formation/none/star_formation_iact.h @@ -0,0 +1,63 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Matthieu Schaller (matthieu.schaller@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_NONE_STAR_FORMATION_IACT_H +#define SWIFT_NONE_STAR_FORMATION_IACT_H + +/** + * @file none/star_formation_iact.h + * @brief Density computation + */ + +/** + * @brief do star_formation computation after the runner_iact_density (symmetric + * version) + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param pi First particle. + * @param pj Second particle. + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void runner_iact_star_formation( + float r2, const float *dx, float hi, float hj, struct part *restrict pi, + struct part *restrict pj, float a, float H) {} + +/** + * @brief do star_formation computation after the runner_iact_density (non + * symmetric version) + * + * @param r2 Comoving square distance between the two particles. + * @param dx Comoving vector separating both particles (pi - pj). + * @param hi Comoving smoothing-length of particle i. + * @param hj Comoving smoothing-length of particle j. + * @param pi First particle. + * @param pj Second particle (not updated). + * @param a Current scale factor. + * @param H Current Hubble parameter. + */ +__attribute__((always_inline)) INLINE static void +runner_iact_nonsym_star_formation(float r2, const float *dx, float hi, float hj, + struct part *restrict pi, + const struct part *restrict pj, float a, + float H) {} + +#endif /* SWIFT_NONE_STAR_FORMATION_IACT_H */ diff --git a/src/star_formation_iact.h b/src/star_formation_iact.h new file mode 100644 index 0000000000000000000000000000000000000000..a62413a07a2aa0cb5cf4e12c4c33c9e82b83e50e --- /dev/null +++ b/src/star_formation_iact.h @@ -0,0 +1,42 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2018 Matthieu Schaller (schaller@strw.leidenuniv.nl) + * Folkert Nobels (nobels@strw.leidenuniv.nl) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_STAR_FORMATION_IACT_H +#define SWIFT_STAR_FORMATION_IACT_H + +/** + * @file src/star_formation_iact.h + * @brief Branches between the different star formation iact. + */ + +/* Config parameters. */ +#include "../config.h" + +/* Import the right star formation law definition */ +#if defined(STAR_FORMATION_NONE) +#include "./star_formation/none/star_formation_iact.h" +#elif defined(STAR_FORMATION_EAGLE) +#include "./star_formation/EAGLE/star_formation_iact.h" +#elif defined(STAR_FORMATION_GEAR) +#include "./star_formation/GEAR/star_formation_iact.h" +#else +#error "Invalid choice of star formation law" +#endif + +#endif /* SWIFT_STAR_FORMATION_IACT_H */ diff --git a/src/swift.h b/src/swift.h index fe9196a8fcf6d1845c9446c480c7961504a4756f..d8221080b1179b8bddc5441cdd4ae19a3fca5b74 100644 --- a/src/swift.h +++ b/src/swift.h @@ -56,6 +56,7 @@ #include "memuse.h" #include "mesh_gravity.h" #include "minmax.h" +#include "mpiuse.h" #include "multipole.h" #include "outputlist.h" #include "parallel_io.h" @@ -66,6 +67,7 @@ #include "physical_constants.h" #include "potential.h" #include "pressure_floor.h" +#include "pressure_floor_iact.h" #include "profiler.h" #include "queue.h" #include "random.h" @@ -76,6 +78,7 @@ #include "single_io.h" #include "space.h" #include "star_formation.h" +#include "star_formation_iact.h" #include "star_formation_logger.h" #include "stars.h" #include "stars_io.h" diff --git a/src/task.c b/src/task.c index 4d6cfa2482491b1a08f6b28f7188fb94448afb2e..94652aaa363053c58aa38054f9a1b7c9d0c19b3a 100644 --- a/src/task.c +++ b/src/task.c @@ -46,6 +46,7 @@ #include "error.h" #include "inline.h" #include "lock.h" +#include "mpiuse.h" /* Task type names. */ const char *taskID_names[task_type_count] = {"none", @@ -552,6 +553,12 @@ int task_lock(struct task *t) { "%s).", taskID_names[t->type], subtaskID_names[t->subtype], t->flags, buff); } + + /* And log deactivation, if logging enabled. */ + if (res) { + mpiuse_log_allocation(t->type, t->subtype, &t->req, 0, 0, 0, 0); + } + return res; #else error("SWIFT was not compiled with MPI support."); diff --git a/src/tools.c b/src/tools.c index 0643fb7922c0e3d56b70c6a0d1a30e3ca13154c6..1287d0b634529ff7226c5af5580cc0f9442e3605 100644 --- a/src/tools.c +++ b/src/tools.c @@ -47,6 +47,7 @@ #include "periodic.h" #include "pressure_floor_iact.h" #include "runner.h" +#include "star_formation_iact.h" #include "stars.h" /** @@ -225,6 +226,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { runner_iact_nonsym_density(r2, dx, hi, pj->h, pi, pj, a, H); runner_iact_nonsym_chemistry(r2, dx, hi, pj->h, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hi, pj->h, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, hi, pj->h, pi, pj, a, H); } } } @@ -258,6 +260,7 @@ void pairs_all_density(struct runner *r, struct cell *ci, struct cell *cj) { runner_iact_nonsym_density(r2, dx, hj, pi->h, pj, pi, a, H); runner_iact_nonsym_chemistry(r2, dx, hj, pi->h, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dx, hj, pi->h, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dx, hj, pi->h, pj, pi, a, H); } } } @@ -536,6 +539,7 @@ void self_all_density(struct runner *r, struct cell *ci) { runner_iact_nonsym_density(r2, dxi, hi, hj, pi, pj, a, H); runner_iact_nonsym_chemistry(r2, dxi, hi, hj, pi, pj, a, H); runner_iact_nonsym_pressure_floor(r2, dxi, hi, hj, pi, pj, a, H); + runner_iact_nonsym_star_formation(r2, dxi, hi, hj, pi, pj, a, H); } /* Hit or miss? */ @@ -549,6 +553,7 @@ void self_all_density(struct runner *r, struct cell *ci) { runner_iact_nonsym_density(r2, dxi, hj, hi, pj, pi, a, H); runner_iact_nonsym_chemistry(r2, dxi, hj, hi, pj, pi, a, H); runner_iact_nonsym_pressure_floor(r2, dxi, hj, hi, pj, pi, a, H); + runner_iact_nonsym_star_formation(r2, dxi, hj, hi, pj, pi, a, H); } } } diff --git a/tests/testHydroMPIrules.c b/tests/testHydroMPIrules.c index 0d56b584bbfc24042350e39921d01f53028bbf4e..9a303b2f0d604f1b3062624674f7e38c73e65ed9 100644 --- a/tests/testHydroMPIrules.c +++ b/tests/testHydroMPIrules.c @@ -83,6 +83,8 @@ void test(void) { /* --- Test the density loop --- */ runner_iact_nonsym_density(r2, dx, pi.h, pj.h, &pi, &pj, a, H); runner_iact_nonsym_chemistry(r2, dx, pi.h, pj.h, &pi, &pj, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, pi.h, pj.h, &pi, &pj, a, H); + runner_iact_nonsym_star_formation(r2, dx, pi.h, pj.h, &pi, &pj, a, H); /* Check whether pj has been modified */ j_not_ok = memcmp(&pj, &pj2, sizeof(struct part)); diff --git a/tests/testSymmetry.c b/tests/testSymmetry.c index eb20c84ec8d38cd52eff10a316383a8797a8c6c0..f109cd0bef0627387bd63b488615f0b26461725a 100644 --- a/tests/testSymmetry.c +++ b/tests/testSymmetry.c @@ -148,15 +148,21 @@ void test(void) { /* Call the symmetric version */ runner_iact_density(r2, dx, pi.h, pj.h, &pi, &pj, a, H); runner_iact_chemistry(r2, dx, pi.h, pj.h, &pi, &pj, a, H); + runner_iact_pressure_floor(r2, dx, pi.h, pj.h, &pi, &pj, a, H); + runner_iact_star_formation(r2, dx, pi.h, pj.h, &pi, &pj, a, H); /* Call the non-symmetric version */ runner_iact_nonsym_density(r2, dx, pi2.h, pj2.h, &pi2, &pj2, a, H); runner_iact_nonsym_chemistry(r2, dx, pi2.h, pj2.h, &pi2, &pj2, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, pi2.h, pj2.h, &pi2, &pj2, a, H); + runner_iact_nonsym_star_formation(r2, dx, pi2.h, pj2.h, &pi2, &pj2, a, H); dx[0] = -dx[0]; dx[1] = -dx[1]; dx[2] = -dx[2]; runner_iact_nonsym_density(r2, dx, pj2.h, pi2.h, &pj2, &pi2, a, H); runner_iact_nonsym_chemistry(r2, dx, pj2.h, pi2.h, &pj2, &pi2, a, H); + runner_iact_nonsym_pressure_floor(r2, dx, pj2.h, pi2.h, &pj2, &pi2, a, H); + runner_iact_nonsym_star_formation(r2, dx, pj2.h, pi2.h, &pj2, &pi2, a, H); /* Check that the particles are the same */ i_not_ok = memcmp(&pi, &pi2, sizeof(struct part)); diff --git a/tools/match_mpireports.py b/tools/match_mpireports.py new file mode 100755 index 0000000000000000000000000000000000000000..3541506c41cbc8ca7f7ce67b30f42bb013adf35c --- /dev/null +++ b/tools/match_mpireports.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python +""" +Usage: + match_mpireports.py [options] mpi-reports... + +Match the rows that sends start and recvs complete from a set of mpi-reports +of a single step, and output the matched rows to standard output. If captured +the output can be analysed to see how long the send to recvs took to complete. + +This file is part of SWIFT. + +Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) +All Rights Reserved. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +""" + +import sys +import argparse + +# Handle the command line. +parser = argparse.ArgumentParser(description="Match MPI reports") + +parser.add_argument("input", + nargs="+", + metavar="mpi-reports", + help="MPI reports") +parser.add_argument( + "-v", + "--verbose", + dest="verbose", + help="Verbose output", + default=False, + action="store_true", +) +args = parser.parse_args() +infiles = args.input + +# Indices for words in a line. +sticcol=0 +eticcol=1 +dticcol=2 +stepcol=3 +rankcol=4 +otherrankcol=5 +typecol=6 +itypecol=7 +subtypecol=8 +isubtypecol=9 +activationcol=10 +tagcol=11 +sizecol=12 +sum=13 + +# Keyed lines. +sends = {} +recvs = {} + +# Gather keys from input files. We created dicts with matchable keys +# for when sends start and recvs end. Other pairings are possible... +# Note size of completion recv is negative. +for f in infiles: + if args.verbose: + print "Processing: " + f + with open(f, "r") as fp: + for line in fp: + if line[0] == '#': + continue + words = line.split() + if words[activationcol] == "1" and words[typecol] == "send": + key = words[otherrankcol] + "/" + \ + words[rankcol] + "/" + \ + words[subtypecol] + "/" + \ + words[tagcol] + "/" + \ + words[sizecol] + if not key in sends: + sends[key] = [line[:-1]] + else: + sends[key].append(line[:-1]) + + elif words[activationcol] == "0" and words[typecol] == "recv": + key = words[rankcol] + "/" + \ + words[otherrankcol] + "/" + \ + words[subtypecol] + "/" + \ + words[tagcol] + "/" + \ + words[sizecol][1:] + + if not key in recvs: + recvs[key] = [line[:-1]] + else: + recvs[key].append(line[:-1]) + +# Now output. Note we could have unmatched recv keys, we don't check for that. +for key in sends: + if key in recvs: + if len(sends[key]) == 1 and len(recvs[key]) == 1: + print sends[key][0], recvs[key][0] + else: + print "# ERROR: found ", len(sends[key]), "/", len(recvs[key]), " matches for key: ", key, " should be 1/1" + else: + print "# ERROR: missing recv key: ", key + + +sys.exit(0)