#!/home/conda/feedstock_root/build_artifacts/bld/rattler-build_gstlal-ugly_1767612080/host_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_pl/bin/python
#
# Copyright (C) 2018 Duncan Meacher
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

from functools import partial
import sys
import os
import re
from optparse import OptionParser
from datetime import datetime
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.dates as md
import matplotlib.pyplot as plt

from ligo.scald import report

from gstlal.plots import util as plotutil

matplotlib.rcParams.update({
    "font.size": 12.0,
    "axes.titlesize": 14.0,
    "axes.labelsize": 14.0,
    "xtick.labelsize": 12.0,
    "ytick.labelsize": 12.0,
    "legend.fontsize": 12.0,
    "figure.dpi": 300,
    "savefig.dpi": 300,
    "text.usetex": False,
    "path.simplify": True
})

color_scheme = ['#332288', '#88CCEE', '#44AA99', '#117733', '#999933', '#DDCC77', '#CC6677', '#882255', '#AA4499']


def generic_hist_plot(durations,jobname):
	max_t = max(durations)

	if max_t > (3600. * 4.): # if max duration is > 4 hour, use tick marks of 1 hours
		denom  = 3600.
		x_bins = np.arange(0,np.ceil(max_t/denom)+1,1)
		if max_t > (3600. * 60.):
			x_tick = np.arange(0,np.ceil(max_t/denom)+1,12)
		else:
			x_tick = np.arange(0,np.ceil(max_t/denom)+1,1)
		xlabel = "time (hours)"
	else:
		denom  = 60.
		if max_t > (3600. * 2.) and max_t < (3600. * 4.): # if max_t is between 2 and 4 hours
			x_bins = np.arange(0,np.ceil(max_t/denom)+1,10)
			x_tick = np.arange(0,np.ceil(max_t/denom)+1,10)
		elif max_t > 3600 and max_t < (3600. * 2.): # if max_t is between 1 and 2 hours
			x_bins = np.arange(0,np.ceil(max_t/denom)+1,2)
			x_tick = np.arange(0,np.ceil(max_t/denom)+1,10)
		else: # if max_t < 1 hours, use tick marks of 1 minute
			x_bins = np.arange(0,np.ceil(max_t/denom)+1,1)
			x_tick = np.arange(0,np.ceil(max_t/denom)+1,1)
		xlabel = "time (minutes)"
	
	fig = plt.figure(figsize=(20,10))
	plt.hist(np.array(durations)/denom, bins = x_bins)
	ax = fig.gca()
	ymin, ymax = ax.get_ylim()
	plt.plot([np.mean(durations)/denom, np.mean(durations)/denom],[0, ymax], color='black',linestyle='--', label='mean duration')
	plt.plot([max_t/denom, max_t/denom],[0, ymax], color='black',linestyle='-', label='max duration')
	plt.title("%s durations" % jobname, fontsize=20)
	plt.xlabel("%s"%xlabel, fontsize=16)
	plt.ylabel("Frequency", fontsize=16)
	plt.xticks(x_tick)
	plt.xlim([0, np.ceil(max_t/denom)])
	plt.legend(loc='upper right', fontsize=16)
	ax.yaxis.grid(True)
	plotName = '%s/%s_runtime_hist.png' % (options.output_dir,jobname)
	plt.savefig(plotName)
	plt.close(fig)


def inspiral_hist_plot(gstlal_inspiral_dur, gstlal_inspiral_inj_dur):
	max_t = max(max(gstlal_inspiral_dur), max(gstlal_inspiral_inj_dur))
	
	denom = 3600.
	x_bins = np.arange(0,np.ceil(max_t/denom)+1,1)
	if max_t > (3600. * 60.):
		x_tick = np.arange(0,np.ceil(max_t/denom)+1,12)
	else:
		x_tick = np.arange(0,np.ceil(max_t/denom)+1,1)
	
	fig = plt.figure(figsize=(20,10))
	ax = fig.gca()
	plt.hist(np.array(gstlal_inspiral_dur)/denom, bins = x_bins, alpha=0.5, label='inspiral', color='blue')
	plt.hist(np.array(gstlal_inspiral_inj_dur)/denom, bins = x_bins, alpha=0.5, label='inspiral_inj', color='red')
	ymin, ymax = ax.get_ylim()
	plt.plot([np.mean(gstlal_inspiral_dur)/denom, np.mean(gstlal_inspiral_dur)/denom],[0, ymax], color='black',linestyle='-.', label='inspiral mean duration')
	plt.plot([np.mean(gstlal_inspiral_inj_dur)/denom, np.mean(gstlal_inspiral_inj_dur)/denom],[0, ymax], color='black',linestyle='--', label='inspiral_inj mean duration')
	plt.plot([max(gstlal_inspiral_dur)/denom, max(gstlal_inspiral_dur)/denom],[0, ymax], color='black',linestyle='-', label='max duration')
	plt.plot([max(gstlal_inspiral_inj_dur)/denom, max(gstlal_inspiral_inj_dur)/denom],[0, ymax], color='black',linestyle='-')
	plt.title("gstlal_inspiral durations comparison", fontsize=20)
	plt.xlabel("time (hours)", fontsize=16)
	plt.ylabel("Frequency", fontsize=16)
	plt.xticks(x_tick)
	plt.xlim([0, np.ceil(max_t/3600.)])
	plt.legend(loc='upper right', fontsize=16)
	ax.yaxis.grid(True)
	plotName = '%s/gstlal_inspiral_runtime_comparison_hist.png' %(options.output_dir)
	plt.savefig(plotName)
	plt.close(fig)


def generate_html_file(plots, output_dir):
	page = report.Report('DAG job durations')

	# header
	summary = report.Tab('Summary')
	summary += report.Header('Duration Results')
	summary += report.Description('Histograms of durations various job types of the pipeline')
	
	# plots
	plot_paths.sort()
	for plot in plot_paths:
		summary += report.Image(os.path.join(output_dir, plot))

	page += summary

	# generate page
	page.save(output_dir)


def parse_command_line():
	parser = OptionParser()

	parser.add_option("--output-dir", metavar = "filename", help = "Set the directory to output figures to.")
	parser.add_option("-i", "--input-file", metavar = "filename", default = "trigger_pipe.dag.dagman.out", help = "input file to read dag information from.")
	parser.add_option("--make-webpage", action = "store_true", help = "Produce a html webpage to display plots.")
	parser.add_option("--verbose", action = "store_true", help = "Be verbose")

	options, filenames = parser.parse_args()

	if not options.output_dir:
		options.output_dir = "plots_hist"

	if not os.path.isdir(options.output_dir):
		os.mkdir(options.output_dir)
	

	return options, filenames


# main

options, filenames = parse_command_line()

# Collect two dictionaries of all job start and stop times
jobstartdict = {}
jobstopdict = {}
jobholdstartdict = {}
jobholdstopdict = {}
for line in open(options.input_file):
	if "[recovery mode]" in line:
		continue
	# 09/20/16 22:11:04 Event: ULOG_EXECUTE for HTCondor Node gstlal_inspiral_0001 (472899.0.0) {09/20/16 22:09:49}
	if "ULOG_EXECUTE" in line:
		date, time, _, _, _, _, _, jobname, _, _, _ = line.split() 
		jobstartdict[jobname] = datetime.strptime("%s %s" % (date, time), "%x %X")
	# 09/20/16 22:12:45 Node gstlal_inspiral_0001 job proc (472945.0.0) completed successfully. 
	if "completed successfully" in line:
		date, time, _, jobname, _, _, _, _, _ = line.split()
		if jobname in jobstopdict:
			continue
		jobstopdict[jobname] = datetime.strptime("%s %s" % (date, time), "%x %X")
	# 01/08/19 12:06:30 Event: ULOG_JOB_HELD for HTCondor Node gstlal_inspiral_1663 (12200873.0.0) {01/08/19 12:06:27}
	if "ULOG_JOB_HELD" in line:
		date, time, _, _, _, _, _, jobname, _, _, _ = line.split()
		if jobname in jobholdstartdict:
			jobholdstartdict[jobname].append(datetime.strptime("%s %s" % (date, time), "%x %X"))
		else:
			jobholdstartdict[jobname] = [datetime.strptime("%s %s" % (date, time), "%x %X")]
	# 01/08/19 12:59:30 Event: ULOG_JOB_RELEASED for HTCondor Node gstlal_inspiral_1663 (12200873.0.0) {01/08/19 12:59:21}
	if "ULOG_JOB_RELEASED" in line:
		date, time, _, _, _, _, _, jobname, _, _, _ = line.split()
		if jobname in jobholdstopdict:
			jobholdstopdict[jobname].append(datetime.strptime("%s %s" % (date, time), "%x %X"))
		else:
			jobholdstopdict[jobname] = [datetime.strptime("%s %s" % (date, time), "%x %X")]

# Collect jobs that have both start and stop times
correctKeys = set(jobstartdict.keys()) & set(jobstopdict.keys())
holdKeys = set(jobstartdict.keys()) & set(jobstopdict.keys()) & set(jobholdstartdict.keys()) & set(jobholdstopdict.keys())

if options.verbose:
	print("Number of jobs held:", len(jobholdstartdict))
	print("Number of jobs released:", len(jobholdstopdict))
	print("Number of these completed:", len(holdKeys))

# Calculate each job duration
duration = {}
for k in correctKeys:
	duration[k] = (jobstopdict[k] - jobstartdict[k]).total_seconds()

# Calculate job hold times
holdduration = {}
for k in holdKeys:
	holddur = 0
	for holdstart, holdstop in zip(jobholdstartdict[k], jobholdstopdict[k]):
		holddur += (holdstop - holdstart).total_seconds()
	holdduration[k] = holddur

# Subtract job hold time from full duration
for k,v in holdduration.items():
	duration[k] -= v

# print any jobs that takes longer than 24 hours
if options.verbose:
	print("Jobs with runtimes > 86400s")
	for k,v in sorted(duration.items()):
		if v > 86400:
			print(k, v)
	# FIXME Add regular expresion search to match id numbers to split up by job type
	#print "gstlal_inspiral jobs with runtimes > 86400s"
	#for k,v in sorted(duration.items()):
	#	if "gstlal_inspiral_0" in k and v > 86400:
	#		print k, v
	#print "gstlal_inspiral_inj jobs with runtimes > 86400s"
	#for k,v in sorted(duration.items()):
	#	if "gstlal_inspiral_inj_0" in k and v > 86400:
	#		print k, v

# Create dictinaries, keyed by job type
jobtypes = dict([(k[:-5],[]) for k in duration])
jobdurs  = dict([(k[:-5],[]) for k in duration])

for k,v in duration.items():
	jobtypes[k[:-5]].append((v, k))
	jobdurs[k[:-5]].append(v)

plot_paths = []

# Produce histograms for each job type
for k, v in jobdurs.items():
	if len(v) < 5:
		continue
	generic_hist_plot(v,k)
	plot_path = '%s_runtime_hist.png' % (k)
	plot_paths.append(plot_path)

# print average and top N job durations for each jobs type
for k,v in sorted(jobtypes.items()):
	print("\n", "Job:", k)
	print("Number completed:", len(v))
	print("Average time to complete:", round(np.mean([x[0] for x in v]),2), "s")
	print("5 longest running jobs:")
	for s in sorted(v, reverse=True)[:5]:
		print("\t", s)

if 'gstlal_inspiral' in jobtypes and 'gstlal_inspiral_inj' in jobtypes:
	gstlal_inspiral_dur = [line[0] for line in jobtypes['gstlal_inspiral']]
	gstlal_inspiral_inj_dur = [line[0] for line in jobtypes['gstlal_inspiral_inj']]
	inspiral_hist_plot(gstlal_inspiral_dur, gstlal_inspiral_inj_dur)
	plot_path = 'gstlal_inspiral_runtime_comparison_hist.png'
	plot_paths.append(plot_path)

if options.make_webpage:
	generate_html_file(plot_paths, options.output_dir)
