#!/usr/bin/env python
#
# Usage:
#  python plot_scaling_results.py input-file1-ext input-file2-ext ...
#
# Description:
# Plots speed up, parallel efficiency and time to solution given a "timesteps" output file generated by SWIFT.
# 
# Example:
# python plot_scaling_results.py _hreads_cosma_stdout.txt _threads_knl_stdout.txt
# 
# The working directory should contain files 1_threads_cosma_stdout.txt - 64_threads_cosma_stdout.txt and 1_threads_knl_stdout.txt - 64_threads_knl_stdout.txt, i.e wall clock time for each run using a given number of threads

import sys
import glob
import re
import numpy as np
import matplotlib.pyplot as plt

params = {'axes.labelsize': 14,
'axes.titlesize': 18,
'font.size': 12,
'legend.fontsize': 12,
'xtick.labelsize': 14,
'ytick.labelsize': 14,
'text.usetex': True,
'figure.subplot.left'    : 0.055,
'figure.subplot.right'   : 0.98  ,
'figure.subplot.bottom'  : 0.05  ,
'figure.subplot.top'     : 0.95  ,
'figure.subplot.wspace'  : 0.14  ,
'figure.subplot.hspace'  : 0.12  ,
'lines.markersize' : 6,
'lines.linewidth' : 3.,
'text.latex.unicode': True
}
plt.rcParams.update(params)
plt.rc('font',**{'family':'sans-serif','sans-serif':['Times']})

version = []
branch = []
revision = []
hydro_scheme = []
hydro_kernel = []
hydro_neighbours = []
hydro_eta = []
threadList = []
hexcols = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77',
           '#CC6677', '#882255', '#AA4499', '#661100', '#6699CC', '#AA4466',
           '#4477AA']
linestyle = (hexcols[0],hexcols[1],hexcols[3],hexcols[5],hexcols[6],hexcols[8])
#cmdLine = './swift_fixdt -s -t 16 cosmoVolume.yml'
#platform = 'KNL'

# Work out how many data series there are
if len(sys.argv) == 2:
  inputFileNames = (sys.argv[1],"")
  numOfSeries = 1
elif len(sys.argv) == 3:
  inputFileNames = (sys.argv[1],sys.argv[2])
  numOfSeries = 2
elif len(sys.argv) == 4:
  inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3])
  numOfSeries = 3
elif len(sys.argv) == 5:
  inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
  numOfSeries = 4
elif len(sys.argv) == 6:
  inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5])
  numOfSeries = 5
elif len(sys.argv) == 7:
  inputFileNames = (sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],sys.argv[6])
  numOfSeries = 6

# Get the names of the branch, Git revision, hydro scheme and hydro kernel
def parse_header(inputFile):
  with open(inputFile, 'r') as f:
    found_end = False
    for line in f:
      if 'Branch:' in line:
        s = line.split()
        line = s[2:]
        branch.append(" ".join(line))
      elif 'Revision:' in line:
        s = line.split() 
        revision.append(s[2])
      elif 'Hydrodynamic scheme:' in line:
        line = line[2:-1]
        s = line.split()
        line = s[2:]
        hydro_scheme.append(" ".join(line))
      elif 'Hydrodynamic kernel:' in line:
        line = line[2:-1]
        s = line.split()
        line = s[2:5]
        hydro_kernel.append(" ".join(line))
      elif 'neighbours:' in line:
        s = line.split() 
        hydro_neighbours.append(s[4])
      elif 'Eta:' in line:
        s = line.split() 
        hydro_eta.append(s[2])
  return

# Parse file and return total time taken, speed up and parallel efficiency
def parse_files():
  
  times = []
  totalTime = []
  serialTime = []
  speedUp = []
  parallelEff = []

  for i in range(0,numOfSeries): # Loop over each data series
 
    # Get each file that starts with the cmd line arg
    file_list = glob.glob(inputFileNames[i] + "*")
    
    threadList.append([])

    # Create a list of threads using the list of files
    for fileName in file_list:
      s = re.split(r'[_.]+',fileName)
      threadList[i].append(int(s[1]))

    # Sort the thread list in ascending order and save the indices
    sorted_indices = np.argsort(threadList[i])
    threadList[i].sort()

    # Sort the file list in ascending order acording to the thread number
    file_list = [ file_list[j] for j in sorted_indices]

    parse_header(file_list[0])

    branch[i] = branch[i].replace("_", "\\_") 
    
    version.append("$\\textrm{%s}$"%str(branch[i]) + " " + revision[i] + "\n" + hydro_scheme[i] + 
                   "\n" + hydro_kernel[i] + r", $N_{ngb}=%d$"%float(hydro_neighbours[i]) + 
                   r", $\eta=%.3f$"%float(hydro_eta[i]))
    times.append([])
    totalTime.append([])
    speedUp.append([])
    parallelEff.append([])

    # Loop over all files for a given series and load the times
    for j in range(0,len(file_list)):
      times[i].append([])
      times[i][j].append(np.loadtxt(file_list[j],usecols=(5,), skiprows=11))
      totalTime[i].append(np.sum(times[i][j]))

    serialTime.append(totalTime[i][0])
    
    # Loop over all files for a given series and calculate speed up and parallel efficiency
    for j in range(0,len(file_list)):
      speedUp[i].append(serialTime[i] / totalTime[i][j])
      parallelEff[i].append(speedUp[i][j] / threadList[i][j])

  return (times,totalTime,speedUp,parallelEff)

def print_results(times,totalTime,parallelEff,version):
 
  for i in range(0,numOfSeries):
    print " "
    print "------------------------------------"
    print version[i]
    print "------------------------------------"
    print "Wall clock time for: {} time steps".format(len(times[0][0][0]))
    print "------------------------------------"
    
    for j in range(0,len(threadList[i])):
      print str(threadList[i][j]) + " threads: {}".format(totalTime[i][j])
    
    print " "
    print "------------------------------------"
    print "Parallel Efficiency for: {} time steps".format(len(times[0][0][0]))
    print "------------------------------------"
    
    for j in range(0,len(threadList[i])):
      print str(threadList[i][j]) + " threads: {}".format(parallelEff[i][j])

  return

def plot_results(times,totalTime,speedUp,parallelEff):
  
  fig, axarr = plt.subplots(2, 2, figsize=(10,10), frameon=True)
  speedUpPlot = axarr[0, 0]
  parallelEffPlot = axarr[0, 1]
  totalTimePlot = axarr[1, 0]
  emptyPlot = axarr[1, 1]
  
  # Plot speed up
  speedUpPlot.plot(threadList[0],threadList[0], linestyle='--', lw=1.5, color='0.2')
  for i in range(0,numOfSeries):
    speedUpPlot.plot(threadList[i],speedUp[i],linestyle[i],label=version[i])
  
  speedUpPlot.set_ylabel("${\\rm Speed\\textendash up}$", labelpad=0.)
  speedUpPlot.set_xlabel("${\\rm Threads}$", labelpad=0.)
  speedUpPlot.set_xlim([0.7,threadList[i][-1]+1])
  speedUpPlot.set_ylim([0.7,threadList[i][-1]+1])

  # Plot parallel efficiency
  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [1,1], 'k--', lw=1.5, color='0.2')
  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.9,0.9], 'k--', lw=1.5, color='0.2')
  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.75,0.75], 'k--', lw=1.5, color='0.2')
  parallelEffPlot.plot([threadList[0][0], 10**np.floor(np.log10(threadList[0][-1])+1)], [0.5,0.5], 'k--', lw=1.5, color='0.2')
  for i in range(0,numOfSeries):
    parallelEffPlot.plot(threadList[i],parallelEff[i],linestyle[i])

  parallelEffPlot.set_xscale('log')
  parallelEffPlot.set_ylabel("${\\rm Parallel~efficiency}$", labelpad=0.)
  parallelEffPlot.set_xlabel("${\\rm Threads}$", labelpad=0.)
  parallelEffPlot.set_ylim([0,1.1])
  parallelEffPlot.set_xlim([0.9,10**(np.floor(np.log10(threadList[i][-1]))+0.5)])

  # Plot time to solution     
  for i in range(0,numOfSeries):
    pts = [1, 10**np.floor(np.log10(threadList[i][-1])+1)]
    totalTimePlot.loglog(pts,totalTime[i][0]/pts, 'k--', lw=1., color='0.2')
    totalTimePlot.loglog(threadList[i],totalTime[i],linestyle[i],label=version[i])

  y_min = 10**np.floor(np.log10(np.min(totalTime[:][-1])*0.6))
  y_max = 1.2*10**np.floor(np.log10(np.max(totalTime[:][0]) * 1.5)+1)
  totalTimePlot.set_xscale('log')
  totalTimePlot.set_xlabel("${\\rm Threads}$", labelpad=0.)
  totalTimePlot.set_ylabel("${\\rm Time~to~solution}~[{\\rm ms}]$", labelpad=0.)
  totalTimePlot.set_xlim([0.9, 10**(np.floor(np.log10(threadList[i][-1]))+0.5)])
  totalTimePlot.set_ylim(y_min, y_max)

  ax2 = totalTimePlot.twinx()
  ax2.set_yscale('log')
  ax2.set_ylabel("${\\rm Time~to~solution}~[{\\rm hr}]$", labelpad=0.)
  ax2.set_ylim(y_min / 3.6e6, y_max / 3.6e6)
  
  totalTimePlot.legend(bbox_to_anchor=(1.21, 0.97), loc=2, borderaxespad=0.,prop={'size':12}, frameon=False)
  emptyPlot.axis('off')
  
  for i, txt in enumerate(threadList[0]):
    if 2**np.floor(np.log2(threadList[0][i])) == threadList[0][i]: # only powers of 2
      speedUpPlot.annotate("$%s$"%txt, (threadList[0][i],speedUp[0][i]), (threadList[0][i],speedUp[0][i] + 0.3), color=hexcols[0])
      parallelEffPlot.annotate("$%s$"%txt, (threadList[0][i],parallelEff[0][i]), (threadList[0][i], parallelEff[0][i]+0.02), color=hexcols[0])
      totalTimePlot.annotate("$%s$"%txt, (threadList[0][i],totalTime[0][i]), (threadList[0][i], totalTime[0][i]*1.1), color=hexcols[0])

  #fig.suptitle("Thread Speed Up, Parallel Efficiency and Time To Solution for {} Time Steps of Cosmo Volume\n Cmd Line: {}, Platform: {}".format(len(times[0][0][0]),cmdLine,platform))
  fig.suptitle("${\\rm Speed\\textendash up,~parallel~efficiency~and~time~to~solution~for}~%d~{\\rm time\\textendash steps}$"%len(times[0][0][0]), fontsize=16)

  return

# Calculate results
(times,totalTime,speedUp,parallelEff) = parse_files()

plot_results(times,totalTime,speedUp,parallelEff)

print_results(times,totalTime,parallelEff,version)

# And plot
plt.show()