diff --git a/configure.ac b/configure.ac index c715fb34baeb469e87977da779d82f48f564999e..9b8f7160f7b9bb14bfa0bfd20233de12dde99fda 100644 --- a/configure.ac +++ b/configure.ac @@ -375,6 +375,7 @@ fi # Check whether we have any of the ARM v8.1 tick timers AX_ASM_ARM_PMCCNTR AX_ASM_ARM_CNTVCT + # See if we want memuse reporting. AC_ARG_ENABLE([memuse-reports], [AS_HELP_STRING([--enable-memuse-reports], @@ -387,6 +388,18 @@ if test "$enable_memuse_reports" = "yes"; then AC_DEFINE([SWIFT_MEMUSE_REPORTS],1,[Enable memory usage reports]) fi +# See if we want mpi reporting. +AC_ARG_ENABLE([mpiuse-reports], + [AS_HELP_STRING([--enable-mpiuse-reports], + [Output reports about MPI tasks requests@<:@yes/no@:>@] + )], + [enable_mpiuse_reports="$enableval"], + [enable_mpiuse_reports="no"] +) +if test "$enable_mpiuse_reports" = "yes"; then + AC_DEFINE([SWIFT_MPIUSE_REPORTS],1,[Enable MPI task reports]) +fi + # Define HAVE_POSIX_MEMALIGN if it works. AX_FUNC_POSIX_MEMALIGN diff --git a/examples/main.c b/examples/main.c index e08999bdcf89e237fbf4f72ad756703bfa3291c8..b91300fb8436128915daff3418337f6c0132193a 100644 --- a/examples/main.c +++ b/examples/main.c @@ -1165,6 +1165,15 @@ int main(int argc, char *argv[]) { } #endif + /* Dump MPI requests if collected. */ +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + { + char dumpfile[40]; + snprintf(dumpfile, 40, "mpiuse_report-rank%d-step%d.dat", engine_rank, 0); + mpiuse_log_dump(dumpfile); + } +#endif + /* Main simulation loop */ /* ==================== */ int force_stop = 0, resubmit = 0; @@ -1226,6 +1235,16 @@ int main(int argc, char *argv[]) { } #endif + /* Dump MPI requests if collected. */ +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + { + char dumpfile[40]; + snprintf(dumpfile, 40, "mpiuse_report-rank%d-step%d.dat", engine_rank, + j + 1); + mpiuse_log_dump(dumpfile); + } +#endif // WITH_MPI + #ifdef SWIFT_DEBUG_THREADPOOL /* Dump the task data using the given frequency. */ if (dump_threadpool && (dump_threadpool == 1 || j % dump_threadpool == 1)) { diff --git a/src/Makefile.am b/src/Makefile.am index 0d3be275a2fa224f9f3e591c702a07ce00897f9f..1a29f60e76b9e2eaf46e577b20094201c5aa4377 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -52,7 +52,7 @@ include_HEADERS = space.h runner.h queue.h task.h lock.h cell.h part.h const.h \ logger_io.h tracers_io.h tracers.h tracers_struct.h star_formation_io.h fof.h \ star_formation_struct.h star_formation.h star_formation_iact.h \ star_formation_logger.h star_formation_logger_struct.h \ - velociraptor_struct.h velociraptor_io.h random.h memuse.h memuse_rnodes.h black_holes.h black_holes_io.h \ + velociraptor_struct.h velociraptor_io.h random.h memuse.h mpiuse.h memuse_rnodes.h black_holes.h black_holes_io.h \ black_holes_properties.h black_holes_struct.h feedback.h feedback_struct.h feedback_properties.h # source files for EAGLE cooling @@ -78,7 +78,7 @@ AM_SOURCES = space.c runner.c queue.c task.c cell.c engine.c engine_maketasks.c part_type.c xmf.c gravity_properties.c gravity.c \ collectgroup.c hydro_space.c equation_of_state.c \ chemistry.c cosmology.c restart.c mesh_gravity.c velociraptor_interface.c \ - outputlist.c velociraptor_dummy.c logger_io.c memuse.c memuse_rnodes.c fof.c \ + outputlist.c velociraptor_dummy.c logger_io.c memuse.c mpiuse.c memuse_rnodes.c fof.c \ hashmap.c \ $(EAGLE_COOLING_SOURCES) $(EAGLE_FEEDBACK_SOURCES) diff --git a/src/memuse.c b/src/memuse.c index f3470f6c4569067d4f352cfb970c57d8562745b4..82390bb5efdc50fd4bdeb4f843228c85353ee8d9 100644 --- a/src/memuse.c +++ b/src/memuse.c @@ -28,9 +28,9 @@ #include "../config.h" /* Standard includes. */ +#include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include <stdint.h> #include <string.h> #include <sys/types.h> #include <unistd.h> diff --git a/src/memuse_rnodes.c b/src/memuse_rnodes.c index daf8164fe4d39a2532287e1bfc8b2d132bf05b19..4b483351bdb208c48f910656d6e2944d5bdd4011 100644 --- a/src/memuse_rnodes.c +++ b/src/memuse_rnodes.c @@ -26,9 +26,9 @@ #include "../config.h" /* Standard includes. */ +#include <stdint.h> #include <stdio.h> #include <stdlib.h> -#include <stdint.h> #include <string.h> #include <sys/types.h> #include <unistd.h> @@ -150,8 +150,7 @@ static struct memuse_rnode *memuse_rnode_lookup(const struct memuse_rnode *node, * @param value pointer that will be stored as the value of the leaf node. */ void memuse_rnode_insert_child(struct memuse_rnode *node, uint8_t depth, - uint8_t *key, uint8_t keylen, - void *value) { + uint8_t *key, uint8_t keylen, void *value) { /* Check if keypart this already exists at this level and add new child if * not. */ diff --git a/src/memuse_rnodes.h b/src/memuse_rnodes.h index e1f4b2bce4ad56d0571e8d74b1d8b705ed2be29d..41f24a98ad60396aec06d3170d478834428007ce 100644 --- a/src/memuse_rnodes.h +++ b/src/memuse_rnodes.h @@ -42,8 +42,7 @@ struct memuse_rnode { void memuse_rnode_dump(int depth, struct memuse_rnode *node, int full); void memuse_rnode_insert_child(struct memuse_rnode *node, uint8_t depth, - uint8_t *key, uint8_t keylen, - void *value); + uint8_t *key, uint8_t keylen, void *value); struct memuse_rnode *memuse_rnode_find_child(struct memuse_rnode *node, uint8_t depth, uint8_t *key, uint8_t keylen); diff --git a/src/mpiuse.c b/src/mpiuse.c new file mode 100644 index 0000000000000000000000000000000000000000..7956d7282f3e812cc75ebac4cf121af5aacf2a99 --- /dev/null +++ b/src/mpiuse.c @@ -0,0 +1,324 @@ +/* This file is part of SWIFT. + * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ + +/** + * @file mpiuse.c + * @brief file of routines to report about MPI tasks used in SWIFT. + */ +/* Config parameters. */ +#include "../config.h" + +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) + +/* Standard includes. */ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +/* Local defines. */ +#include "mpiuse.h" + +/* Local includes. */ +#include "atomic.h" +#include "clocks.h" +#include "engine.h" +#include "error.h" +#include "memuse_rnodes.h" + +/* The initial size and increment of the log entries buffer. */ +#define MPIUSE_INITLOG 1000000 + +/* A megabyte for conversions. */ +#define MEGABYTE 1048576.0 + +/* Also recorded in logger. */ +extern int engine_rank; +extern int engine_current_step; + +/* Entry for logger of MPI send and recv requests in a step. */ +struct mpiuse_log_entry { + + /* Type and subtype of MPI task. */ + int type; + int subtype; + + /* Step of action. */ + int step; + + /* Whether an activation, send or recv, or if handoff completed. Not the + * same as delivered, need to match across ranks to see that. */ + int activation; + + /* Memory of the request. */ + size_t size; + + /* Pointer to the request associated with the call. Needs to be + * unique and match to the successful */ + union { + void *ptr; + uint8_t vptr[sizeof(uintptr_t)]; /* For rnode keys. */ + }; + + /* Relative time of this action. */ + ticks dtic; + + /* Whether request is still active, i.e. successful test not seen. */ + int active; + + /* Rank of otherside of communication. */ + int otherrank; + + /* The tag. */ + int tag; +}; + +/* The log of activations and handoffs. All volatile as accessed from threads + * that use the value to synchronise. */ +static struct mpiuse_log_entry *volatile mpiuse_log = NULL; +static volatile size_t mpiuse_log_size = 0; +static volatile size_t mpiuse_log_count = 0; +static volatile size_t mpiuse_old_count = 0; +static volatile size_t mpiuse_log_done = 0; + +/** + * @brief reallocate the entries log if space is needed. + */ +static void mpiuse_log_reallocate(size_t ind) { + + if (ind == 0) { + + /* Need to perform initialization. Be generous. */ + if ((mpiuse_log = (struct mpiuse_log_entry *)malloc( + sizeof(struct mpiuse_log_entry) * MPIUSE_INITLOG)) == NULL) + error("Failed to allocate MPI use log."); + + /* Last action. */ + mpiuse_log_size = MPIUSE_INITLOG; + + } else { + struct mpiuse_log_entry *new_log; + if ((new_log = (struct mpiuse_log_entry *)malloc( + sizeof(struct mpiuse_log_entry) * + (mpiuse_log_size + MPIUSE_INITLOG))) == NULL) + error("Failed to re-allocate MPI use log."); + + /* Wait for all writes to the old buffer to complete. */ + while (mpiuse_log_done < mpiuse_log_size) + ; + + /* Copy to new buffer. */ + memcpy(new_log, mpiuse_log, + sizeof(struct mpiuse_log_entry) * mpiuse_log_size); + free(mpiuse_log); + mpiuse_log = new_log; + + /* Last action, releases waiting threads. */ + atomic_add(&mpiuse_log_size, MPIUSE_INITLOG); + } +} + +/** + * @brief Log an MPI request or handoff. + * + * @param type the task type (send or recv). + * @param subtype the task subtype. + * @param ptr pointer to the MPI request. + * @param activation if not is a successful MPI_Test, not MPI_Isend or + * MPI_Irecv. + * @param size the size in bytes of memory to be transfered or received. + * 0 for a deactivation. + * @param otherrank other rank associated with the transfer. + * @param tag the MPI tag. + */ +void mpiuse_log_allocation(int type, int subtype, void *ptr, int activation, + size_t size, int otherrank, int tag) { + + size_t ind = atomic_inc(&mpiuse_log_count); + + /* If we are at the current size we need more space. */ + if (ind == mpiuse_log_size) mpiuse_log_reallocate(ind); + + /* Other threads wait for space. */ + while (ind > mpiuse_log_size) + ; + + /* Record the log. */ + mpiuse_log[ind].step = engine_current_step; + mpiuse_log[ind].activation = activation; + mpiuse_log[ind].size = size; + mpiuse_log[ind].ptr = ptr; + mpiuse_log[ind].otherrank = otherrank; + mpiuse_log[ind].tag = tag; + mpiuse_log[ind].dtic = getticks() - clocks_start_ticks; + mpiuse_log[ind].active = 1; + atomic_inc(&mpiuse_log_done); +} + +/** + * @brief dump the log to a file and reset, if anything to dump. + * + * @param filename name of file for log dump. + */ +void mpiuse_log_dump(const char *filename) { + + /* Skip if nothing logged this step. */ + if (mpiuse_log_count == mpiuse_old_count) return; + + // ticks tic = getticks(); + + /* Create the radix tree root node. */ + struct memuse_rnode *memuse_rnode_root = + (struct memuse_rnode *)calloc(1, sizeof(struct memuse_rnode)); + + /* Stop any new logs from being processed while we are dumping. */ + size_t log_count = mpiuse_log_count; + size_t old_count = mpiuse_old_count; + + /* Open the output file. */ + FILE *fd; + if ((fd = fopen(filename, "w")) == NULL) { + message("Failed to create MPI use log file '%s', logs not dumped.", + filename); + return; + } + + /* Write a header. */ + fprintf(fd, + "# dtic step rank otherrank type subtype activation tag size sum\n"); + + size_t mpiuse_current = 0; + for (size_t k = old_count; k < log_count; k++) { + + /* Check if this address has already been recorded. */ + struct memuse_rnode *child = memuse_rnode_find_child( + memuse_rnode_root, 0, mpiuse_log[k].vptr, sizeof(uintptr_t)); + + if (child != NULL && child->ptr != NULL) { + + /* Should be the handoff. Check that. */ + if (mpiuse_log[k].activation) { + + /* Used twice, this is an error, but just complain as not fatal. */ +#if SWIFT_DEBUG_CHECKS + message( + "Used the same MPI request address twice " + "(%s/%s: %d->%d: %zd/%d)", + taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], engine_rank, + mpiuse_log[k].otherrank, mpiuse_log[k].size, + mpiuse_log[k].tag); +#endif + continue; + } + + /* Free, update the missing fields, size of request is removed. */ + struct mpiuse_log_entry *oldlog = + (struct mpiuse_log_entry *)child->ptr; + mpiuse_log[k].size = -oldlog->size; + mpiuse_log[k].otherrank = oldlog->otherrank; + mpiuse_log[k].tag = oldlog->tag; + + /* And deactivate this key. */ + child->ptr = NULL; + + /* And mark this as handed off. */ + mpiuse_log[k].active = 0; + oldlog->active = 0; + + } else if (child == NULL && mpiuse_log[k].activation) { + + /* Not found, so new send/recv which we store the log against the + * address. */ + memuse_rnode_insert_child(memuse_rnode_root, 0, mpiuse_log[k].vptr, + sizeof(uintptr_t), &mpiuse_log[k]); + + } else if (child == NULL && !mpiuse_log[k].activation) { + + /* Unmatched handoff, not OK, but not fatal. */ +#if SWIFT_DEBUG_CHECKS + if (mpiuse_log[k].ptr != NULL) { + message("Unmatched MPI_Test found: (%s/%s: %d->%d: %zd/%d)", + taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], + engine_rank, mpiuse_log[k].otherrank, + mpiuse_log[k].size, mpiuse_log[k].tag); + } +#endif + continue; + } else if (mpiuse_log[k].activation) { + + /* Must be previously released request with the same address, so we + * store. */ + memuse_rnode_insert_child(memuse_rnode_root, 0, mpiuse_log[k].vptr, + sizeof(uintptr_t), &mpiuse_log[k]); + + } else { + /* Should not happen ... */ + message("Weird MPI log record found: (%s/%s: %d->%d: %zd/%d)", + taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], + engine_rank, mpiuse_log[k].otherrank, + mpiuse_log[k].size, mpiuse_log[k].tag); + continue; + } + + /* Sum of memory in flight. */ + mpiuse_current += mpiuse_log[k].size; + + /* And output. */ + fprintf(fd, "%lld %d %d %d %s %s %d %d %zd %zd\n", mpiuse_log[k].dtic, + mpiuse_log[k].step, engine_rank, + mpiuse_log[k].otherrank, taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], + mpiuse_log[k].activation, mpiuse_log[k].tag, + mpiuse_log[k].size, mpiuse_current); + } + +#ifdef MEMUSE_RNODE_DUMP + /* Debug dump of tree. */ + // memuse_rnode_dump(0, memuse_rnode_root, 0); +#endif + + /* Now check any still active logs, these are errors all should match. */ + if (mpiuse_current != 0) { + message("Some MPI requests have not been completed"); + for (size_t k = old_count; k < log_count; k++) { + if (mpiuse_log[k].active) + message("%s/%s: %d->%d: %zd/%d)", taskID_names[mpiuse_log[k].type], + subtaskID_names[mpiuse_log[k].subtype], + engine_rank, mpiuse_log[k].otherrank, + mpiuse_log[k].size, mpiuse_log[k].tag); + } + } + + /* Finished with the rnodes. */ + memuse_rnode_cleanup(memuse_rnode_root); + + /* Close the file. */ + fflush(fd); + fclose(fd); + + // message("took %.3f %s.", clocks_from_ticks(getticks() - tic), + // clocks_getunit()); +} + +#endif /* defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) */ + diff --git a/src/mpiuse.h b/src/mpiuse.h new file mode 100644 index 0000000000000000000000000000000000000000..ed3f8226a3f6d379410937d46ef4b2dd5abd1e72 --- /dev/null +++ b/src/mpiuse.h @@ -0,0 +1,41 @@ +/******************************************************************************* + * This file is part of SWIFT. + * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + ******************************************************************************/ +#ifndef SWIFT_MPIUSE_H +#define SWIFT_MPIUSE_H + +/* Config parameters. */ +#include "../config.h" + +/* Includes. */ +#include <stdlib.h> + +/* API. */ +#if defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) +void mpiuse_log_dump(const char *filename); +void mpiuse_log_allocation(int type, int subtype, void *ptr, int activation, + size_t size, int otherrank, int tag); +#else + +/* No-op when not reporting. */ +#define mpiuse_log_allocation(type, subtype, ptr, activation, size, \ + otherrank, tag) \ + ; +#endif /* defined(SWIFT_MPIUSE_REPORTS) && defined(WITH_MPI) */ + +#endif /* SWIFT_MPIUSE_H */ diff --git a/src/swift.h b/src/swift.h index b9f8818d8b833231971abb1afb36ee4507648488..94fc6158279edf90d353f8e7d5619cd164fad6d8 100644 --- a/src/swift.h +++ b/src/swift.h @@ -55,6 +55,7 @@ #include "map.h" #include "memuse.h" #include "mesh_gravity.h" +#include "mpiuse.h" #include "multipole.h" #include "outputlist.h" #include "parallel_io.h"