Skip to content
Snippets Groups Projects
Select Git revision
  • 16ff100ce1d0b2cfdc3a95a76c4fcd2779dd45f8
  • master default protected
  • multi-injectors-many
  • simplerdma-wrapper
  • multi-all
  • ragged-one-sided
  • asyncreallyonesided-fast
  • asyncreallyonesided
  • simplerdma-wrapper-roce
  • simplerdma
  • simplerdma-razeh
  • asyncreallyonesided-bunched
  • asyncreallyonesidedx2
  • reallyonesided
  • simpleonesided
  • onesided
  • mpi-thread-split
  • multi-injectors
  • swiftmpiproxies
  • fixed-injections
  • mpiwaitall
  • v1.1
  • v1.0
23 results

mpiuse.c

Blame
  • mpiuse.c 7.80 KiB
    /* This file is part of SWIFT.
     * Copyright (c) 2019 Peter W. Draper (p.w.draper@durham.ac.uk)
     *
     * This program is free software: you can redistribute it and/or modify
     * it under the terms of the GNU Lesser General Public License as published
     * by the Free Software Foundation, either version 3 of the License, or
     * (at your option) any later version.
     *
     * This program is distributed in the hope that it will be useful,
     * but WITHOUT ANY WARRANTY; without even the implied warranty of
     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     * GNU General Public License for more details.
     *
     * You should have received a copy of the GNU Lesser General Public License
     * along with this program.  If not, see <http://www.gnu.org/licenses/>.
     *
     ******************************************************************************/
    
    /**
     *  @file mpiuse.c
     *  @brief file of routines to report about MPI tasks used in SWIFT.
     */
    
    /* Standard includes. */
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    /* Local defines. */
    #include "mpiuse.h"
    
    /* Local includes. */
    #include "atomic.h"
    #include "clocks.h"
    #include "cycle.h"
    #include "error.h"
    
    /* Our rank. */
    extern int myrank;
    
    /* The initial size and increment of the log entries buffer. */
    #define MPIUSE_INITLOG 1000000
    
    /* The log of activations and handoffs. All volatile as accessed from threads
     * that use the value to synchronise. */
    static struct mpiuse_log_entry *volatile mpiuse_log = NULL;
    static volatile size_t mpiuse_log_size = 0;
    static volatile size_t mpiuse_log_count = 0;
    static volatile size_t mpiuse_log_done = 0;
    static volatile int mpiuse_max_rank = 0;
    
    /**
     * @brief reallocate the entries log if space is needed.
     */
    static void mpiuse_log_reallocate(size_t ind) {
    
      if (ind == 0) {
    
        /* Need to perform initialization. Be generous. */
        if ((mpiuse_log = (struct mpiuse_log_entry *)malloc(
                 sizeof(struct mpiuse_log_entry) * MPIUSE_INITLOG)) == NULL)
          error("Failed to allocate MPI use log.");
    
        /* Last action. */
        mpiuse_log_size = MPIUSE_INITLOG;
    
      } else {
        struct mpiuse_log_entry *new_log;
        if ((new_log = (struct mpiuse_log_entry *)malloc(
                 sizeof(struct mpiuse_log_entry) *
                 (mpiuse_log_size + MPIUSE_INITLOG))) == NULL)
          error("Failed to re-allocate MPI use log.");
    
        /* Wait for all writes to the old buffer to complete. */
        while (mpiuse_log_done < mpiuse_log_size)
          ;
    
        /* Copy to new buffer. */
        memcpy(new_log, mpiuse_log,
               sizeof(struct mpiuse_log_entry) * mpiuse_log_size);
        free(mpiuse_log);
        mpiuse_log = new_log;
    
        /* Last action, releases waiting threads. */
        atomic_add(&mpiuse_log_size, MPIUSE_INITLOG);
      }
    }
    
    /**
     * @brief Log an MPI request or handoff.
     *
     * @param rank the rank
     * @param step the step
     * @param tic the ticks at time of log, will be relative.
     * @param type the task type (send or recv).
     * @param subtype the task subtype.
     * @param activation if not is a successful MPI_Test, not MPI_Isend or
     *        MPI_Irecv.
     * @param size the size in bytes of memory to be transfered or received.
     *             0 for a deactivation.
     * @param otherrank other rank associated with the transfer.
     * @param tag the MPI tag.
     */
    void mpiuse_log_allocation(int rank, int step, size_t tic, int type,
                               int subtype, int activation, size_t size,
                               int otherrank, int tag) {
    
      size_t ind = atomic_inc(&mpiuse_log_count);
    
      /* If we are at the current size we need more space. */
      if (ind == mpiuse_log_size) mpiuse_log_reallocate(ind);
    
      /* Other threads wait for space. */
      while (ind > mpiuse_log_size)
        ;
    
      /* Record the log. */
      mpiuse_log[ind].activation = activation;
      mpiuse_log[ind].data = NULL;
      mpiuse_log[ind].otherrank = otherrank;
      mpiuse_log[ind].rank = rank;
      mpiuse_log[ind].req = MPI_REQUEST_NULL;
      mpiuse_log[ind].size = size;
      mpiuse_log[ind].step = step;
      mpiuse_log[ind].subtype = subtype;
      mpiuse_log[ind].tag = tag;
      mpiuse_log[ind].tic = tic;
      mpiuse_log[ind].type = type;
    
      /* Keep number of ranks for convenience. */
      if (rank > mpiuse_max_rank) mpiuse_max_rank = rank;
    
      atomic_inc(&mpiuse_log_done);
    }
    
    /**
     * @brief restore the log from a dump.
     *
     * @param filename name of file with the previous dump in.
     */
    void mpiuse_log_restore(const char *filename) {
    
      /* Open the input file. */
      FILE *fd;
      if ((fd = fopen(filename, "r")) == NULL) {
        message("Failed to open the MPI use log file '%s'.", filename);
        return;
      }
    
      /* Read until the end of the file is reached.*/
      char line[132];
      size_t stic, etic, dtic, size, sum;
      int step, rank, otherrank, itype, isubtype, activation, tag;
      char type[32], subtype[32];
    
      while (!feof(fd)) {
        if (fgets(line, 132, fd) != NULL) {
          if (line[0] != '#') {
            sscanf(line, "%zd %zd %zd %d %d %d %s %d %s %d %d %d %zd %zd", &stic,
                   &etic, &dtic, &step, &rank, &otherrank, type, &itype, subtype,
                   &isubtype, &activation, &tag, &size, &sum);
    
            mpiuse_log_allocation(rank, step, stic, itype, isubtype, activation,
                                  size, otherrank, tag);
          }
        }
      }
      fclose(fd);
    }
    
    /**
     * @brief dump the logs for all ranks to a file.
     *
     * @param nranks the number of ranks.
     * @param dumpfile the file to write
     */
    void mpiuse_dump_logs(int nranks, const char *dumpfile) {
    
      /* Make sure output file is empty, only on one rank. */
      FILE *fd;
      if (myrank == 0) {
        fd = fopen(dumpfile, "w");
    
        /* Header. */
        fprintf(fd,
                "# logticin logtic injtic endtic dtic step rank otherrank itype "
                " isubtype tag size nr_tests tsum tmin tmax\n");
        fclose(fd);
      }
      MPI_Barrier(MPI_COMM_WORLD);
    
      /* Loop over all ranks, one by one, getting each rank to append their
       * logs. */
      for (int k = 0; k < nranks; k++) {
    
        /* Rank 0 decides the index of the writing node, this happens
         * one-by-one. */
        int kk = k;
        MPI_Bcast(&kk, 1, MPI_INT, 0, MPI_COMM_WORLD);
    
        if (kk == myrank) {
    
          /* Open file and position at end. */
          fd = fopen(dumpfile, "a");
    
          /* And append our logs. Note log->tic is not necessarily from this
           * machine, so the conversion to ms may be suspect. We also rebase a
           * version to match the expected injection times for this new run. */
          size_t nlogs = mpiuse_log_count;
          ticks basetics = 0;
          for (size_t k = 0; k < nlogs; k++) {
            struct mpiuse_log_entry *log = &mpiuse_log[k];
            if (log->rank == myrank && log->endtic > 0) {
              if (basetics == 0) basetics = log->tic;
              fprintf(fd,
                      "%lld %.4f %.4f %.4f %.6f %d %d %d %d %d %d %zd %d %.4f %.6f "
                      "%.6f\n",
                      log->tic, clocks_from_ticks(log->tic - basetics),
                      clocks_from_ticks(log->injtic - clocks_start_ticks),
                      clocks_from_ticks(log->endtic - clocks_start_ticks),
                      clocks_from_ticks(log->endtic - log->injtic), log->step,
                      log->rank, log->otherrank, log->type, log->subtype, log->tag,
                      log->size, log->nr_tests, clocks_from_ticks(log->tsum),
                      clocks_from_ticks(log->tmin), clocks_from_ticks(log->tmax));
            }
          }
          fclose(fd);
        }
    
        /* Need to stay in step. */
        MPI_Barrier(MPI_COMM_WORLD);
      }
    }
    
    /**
     * @brief return the number of log entries.
     *
     * @result the number of log entries.
     */
    int mpiuse_nr_logs(void) { return mpiuse_log_count; }
    
    /**
     * @brief return the number of ranks in log.
     *
     * @result the number of ranks we've seen.
     */
    int mpiuse_nr_ranks(void) { return mpiuse_max_rank + 1; }
    
    /**
     * @brief get a log entry.
     *
     * @param ind the index of the entry required.
     * @result NULL if not available.
     */
    struct mpiuse_log_entry *mpiuse_get_log(int ind) {
    
      if ((size_t)ind < mpiuse_log_count && ind >= 0) return &mpiuse_log[ind];
      return NULL;
    }