/* This file is part of SWIFT. * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * ******************************************************************************/ /** * @file mpiuse.c * @brief file of routines to report about MPI tasks used in SWIFT. */ /* Standard includes. */ #include <stdio.h> #include <stdlib.h> #include <string.h> /* Local defines. */ #include "mpiuse.h" /* Local includes. */ #include "atomic.h" #include "clocks.h" #include "cycle.h" #include "error.h" /* Our rank. */ extern int myrank; /* The initial size and increment of the log entries buffer. */ #define MPIUSE_INITLOG 1000000 /* The log of activations and handoffs. All volatile as accessed from threads * that use the value to synchronise. */ static struct mpiuse_log_entry *volatile mpiuse_log = NULL; static volatile size_t mpiuse_log_size = 0; static volatile size_t mpiuse_log_count = 0; static volatile size_t mpiuse_log_done = 0; static volatile int mpiuse_max_rank = 0; /** * @brief reallocate the entries log if space is needed. */ static void mpiuse_log_reallocate(size_t ind) { if (ind == 0) { /* Need to perform initialization. Be generous. */ if ((mpiuse_log = (struct mpiuse_log_entry *)malloc( sizeof(struct mpiuse_log_entry) * MPIUSE_INITLOG)) == NULL) error("Failed to allocate MPI use log."); /* Last action. */ mpiuse_log_size = MPIUSE_INITLOG; } else { struct mpiuse_log_entry *new_log; if ((new_log = (struct mpiuse_log_entry *)malloc( sizeof(struct mpiuse_log_entry) * (mpiuse_log_size + MPIUSE_INITLOG))) == NULL) error("Failed to re-allocate MPI use log."); /* Wait for all writes to the old buffer to complete. */ while (mpiuse_log_done < mpiuse_log_size) ; /* Copy to new buffer. */ memcpy(new_log, mpiuse_log, sizeof(struct mpiuse_log_entry) * mpiuse_log_size); free(mpiuse_log); mpiuse_log = new_log; /* Last action, releases waiting threads. */ atomic_add(&mpiuse_log_size, MPIUSE_INITLOG); } } /** * @brief Log an MPI request or handoff. * * @param rank the rank * @param step the step * @param tic the ticks at time of log, will be relative. * @param type the task type (send or recv). * @param subtype the task subtype. * @param activation if not is a successful MPI_Test, not MPI_Isend or * MPI_Irecv. * @param size the size in bytes of memory to be transfered or received. * 0 for a deactivation. * @param otherrank other rank associated with the transfer. * @param tag the MPI tag. */ void mpiuse_log_allocation(int rank, int step, size_t tic, int type, int subtype, int activation, size_t size, int otherrank, int tag) { size_t ind = atomic_inc(&mpiuse_log_count); /* If we are at the current size we need more space. */ if (ind == mpiuse_log_size) mpiuse_log_reallocate(ind); /* Other threads wait for space. */ while (ind > mpiuse_log_size) ; /* Record the log. */ mpiuse_log[ind].activation = activation; mpiuse_log[ind].data = NULL; mpiuse_log[ind].otherrank = otherrank; mpiuse_log[ind].rank = rank; mpiuse_log[ind].req = MPI_REQUEST_NULL; mpiuse_log[ind].size = size; mpiuse_log[ind].step = step; mpiuse_log[ind].subtype = subtype; mpiuse_log[ind].tag = tag; mpiuse_log[ind].tic = tic; mpiuse_log[ind].type = type; /* Keep number of ranks for convenience. */ if (rank > mpiuse_max_rank) mpiuse_max_rank = rank; atomic_inc(&mpiuse_log_done); } /** * @brief restore the log from a dump. * * @param filename name of file with the previous dump in. */ void mpiuse_log_restore(const char *filename) { /* Open the input file. */ FILE *fd; if ((fd = fopen(filename, "r")) == NULL) { message("Failed to open the MPI use log file '%s'.", filename); return; } /* Read until the end of the file is reached.*/ char line[132]; size_t stic, etic, dtic, size, sum; int step, rank, otherrank, itype, isubtype, activation, tag; char type[32], subtype[32]; while (!feof(fd)) { if (fgets(line, 132, fd) != NULL) { if (line[0] != '#') { sscanf(line, "%zd %zd %zd %d %d %d %s %d %s %d %d %d %zd %zd", &stic, &etic, &dtic, &step, &rank, &otherrank, type, &itype, subtype, &isubtype, &activation, &tag, &size, &sum); mpiuse_log_allocation(rank, step, stic, itype, isubtype, activation, size, otherrank, tag); } } } fclose(fd); } /** * @brief dump the logs for all ranks to a file. * * @param nranks the number of ranks. * @param dumpfile the file to write */ void mpiuse_dump_logs(int nranks, const char *dumpfile) { /* Make sure output file is empty, only on one rank. */ FILE *fd; if (myrank == 0) { fd = fopen(dumpfile, "w"); /* Header. */ fprintf(fd, "# logticin logtic injtic endtic dtic step rank otherrank itype " " isubtype tag size nr_tests tsum tmin tmax\n"); fclose(fd); } MPI_Barrier(MPI_COMM_WORLD); /* Loop over all ranks, one by one, getting each rank to append their * logs. */ for (int k = 0; k < nranks; k++) { /* Rank 0 decides the index of the writing node, this happens * one-by-one. */ int kk = k; MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD); if (kk == myrank) { /* Open file and position at end. */ fd = fopen(dumpfile, "a"); /* And append our logs. Note log->tic is not necessarily from this * machine, so the conversion to ms may be suspect. We also rebase a * version to match the expected injection times for this new run. */ size_t nlogs = mpiuse_log_count; ticks basetics = 0; for (size_t k = 0; k < nlogs; k++) { struct mpiuse_log_entry *log = &mpiuse_log[k]; if (log->rank == myrank && log->endtic > 0) { if (basetics == 0) basetics = log->tic; fprintf(fd, "%lld %.4f %.4f %.4f %.6f %d %d %d %d %d %d %zd %d %.4f %.6f " "%.6f\n", log->tic, clocks_from_ticks(log->tic - basetics), clocks_from_ticks(log->injtic - clocks_start_ticks), clocks_from_ticks(log->endtic - clocks_start_ticks), clocks_from_ticks(log->endtic - log->injtic), log->step, log->rank, log->otherrank, log->type, log->subtype, log->tag, log->size, log->nr_tests, clocks_from_ticks(log->tsum), clocks_from_ticks(log->tmin), clocks_from_ticks(log->tmax)); } } fclose(fd); } /* Need to stay in step. */ MPI_Barrier(MPI_COMM_WORLD); } } /** * @brief return the number of log entries. * * @result the number of log entries. */ int mpiuse_nr_logs(void) { return mpiuse_log_count; } /** * @brief return the number of ranks in log. * * @result the number of ranks we've seen. */ int mpiuse_nr_ranks(void) { return mpiuse_max_rank + 1; } /** * @brief get a log entry. * * @param ind the index of the entry required. * @result NULL if not available. */ struct mpiuse_log_entry *mpiuse_get_log(int ind) { if (ind < mpiuse_log_count && ind >= 0) return &mpiuse_log[ind]; return NULL; }