doubledata.py

#!/usr/bin/env python
"""
Usage:
    doubledata.py [options] input-log doubled-log

Split pairs of send and recv log data entries to increase the number of
messages that we send to represent the data of a step. Preserves the data
volume, but not the tagging.

This file is part of SWIFT.

Copyright (C) 2019 Peter W. Draper (p.w.draper@durham.ac.uk)
All Rights Reserved.

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU Lesser General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import argparse
import sys
from operator import itemgetter

#  Handle the command line.
parser = argparse.ArgumentParser(description="Pair MPI logs")

parser.add_argument(
    "input",
    help="Output log from simulator")
parser.add_argument(
    "output",
    help="Doubled log")
parser.add_argument(
    "-v",
    "--verbose",
    dest="verbose",
    help="Verbose output",
    default=False,
    action="store_true",
)
args = parser.parse_args()
infile = args.input
outfile = args.output

#  Indices for words in a line.
sticcol = 0
eticcol = 1
dticcol = 2
stepcol = 3
rankcol = 4
otherrankcol = 5
typecol = 6
itypecol = 7
subtypecol = 8
isubtypecol = 9
activationcol = 10
tagcol = 11
sizecol = 12
sumcol = 13

# The plan is to pair the sends and receives across all ranks so that
# we can split each pair into two and give everything new tags that also
# match.

#  Keyed lines.
keysends = {}
keyrecvs = {}

#  Indexed lines.
sends = []
recvs = []
nsends = 0
nrecvs = 0

#  Generate keys that are unique between send/recv pairs and gather the
#  associated lines initiation lines.
with open(infile, "r") as fp:
    for line in fp:
        if line[0] == '#':
            continue
        words = line.split()
        if words[activationcol] == "0":
            continue

        if words[itypecol] == "22":
            key = words[otherrankcol] + "/" + \
                  words[rankcol] + "/" + \
                  words[isubtypecol] + "/" + \
                  words[tagcol] + "/" + \
                  words[sizecol]
            if not key in keysends:
                keysends[key] = [nsends]
            else:
                keysends[key].append(nsends)
            sends.append(words)
            nsends = nsends + 1

        elif words[itypecol] == "23":
            key = words[rankcol] + "/" + \
                  words[otherrankcol] + "/" + \
                  words[isubtypecol] + "/" + \
                  words[tagcol] + "/" + \
                  words[sizecol]
            if not key in keyrecvs:
                keyrecvs[key] = [nrecvs]
            else:
                keyrecvs[key].append(nrecvs)
            recvs.append(words)
            nrecvs = nrecvs + 1

print "# Read " + str(nsends) + " sends and " + str(nrecvs) + " recvs"

# Now get the indices of the matches.
msends = [None] * nsends
for key in keysends:
    if key in keyrecvs:
        if len(keysends[key]) == 1 and len(keyrecvs[key]) == 1:
            isend = keysends[key][0]
            irecv = keyrecvs[key][0]
            msends[isend] = irecv
        else:
            print "# ERROR: found ", len(keysends[key]), "/", len(keyrecvs[key]), " matches for key: ", key, " should be 1/1"
    else:
        print "# ERROR: missing recv key: ", key

# Reorder recvs to same order as sends.
recvs = [recvs[i] for i in msends]

# Now we can split and retag. XXX if you run out of tags need to
# split by rank and then maybe itype and use tag count with these.
splits = []
ctag = 0
for i in range(nsends):
    swords = sends[i]
    rwords = recvs[i]
    size = str(int(float(swords[sizecol]) / 2.0))
    swords[sizecol] = size
    rwords[sizecol] = size

    ctag = ctag + 1
    stag = str(ctag)
    swords[tagcol] = stag
    rwords[tagcol] = stag
    splits.append(swords)
    splits.append(rwords)

    ctag = ctag + 1
    stag = str(ctag)
    cswords = list(swords)
    cswords[tagcol] = stag
    crwords = list(rwords)
    crwords[tagcol] = stag
    splits.append(cswords)
    splits.append(crwords)

# Sort by tic.
splits = sorted(splits, key=lambda x: int(x[sticcol]))

#  And output.
with open(outfile, "w") as fp:
    fp.write("# stic etic dtic step rank otherrank type itype subtype isubtype activation tag size sum\n")
    for line in splits:
        fp.write(" ".join(line) + "\n")

print "# Finished"

sys.exit(0)