#!/usr/bin/env tclsh

############################################################################################################
# AnnotSV 3.5.6                                                                                            #
#                                                                                                          #
# AnnotSV: An integrated tool for Structural Variations annotation and ranking                             #
#                                                                                                          #
# Copyright (C) 2017-present Veronique Geoffroy (veronique.geoffroy@inserm.fr)                             #
#                                                                                                          #
# This is part of AnnotSV source code.                                                                     #
#                                                                                                          #
# This program is free software; you can redistribute it and/or                                            #
# modify it under the terms of the GNU General Public License                                              #
# as published by the Free Software Foundation; either version 3                                           #
# of the License, or (at your option) any later version.                                                   #
#                                                                                                          #
# This program is distributed in the hope that it will be useful,                                          #
# but WITHOUT ANY WARRANTY; without even the implied warranty of                                           #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the                                             #
# GNU General Public License for more details.                                                             #
#                                                                                                          #
# You should have received a copy of the GNU General Public License                                        #
# along with this program; If not, see <http://www.gnu.org/licenses/>.                                     #
############################################################################################################

# non-zero exit codes --> usually interpreted as error cases
# zero exit code --> Terminate the process without error
# exit <=> exit 0 (default)

global g_AnnotSV

proc getScriptDirectory {} {
    set scriptFilePath [file normalize [info script]]
    set scriptDir [file dirname $scriptFilePath]
    return $scriptDir
}

#if {[info exists env(ANNOTSV)]} {
#    regsub "/$" $env(ANNOTSV) "" env(ANNOTSV)
#}
#set ANNOTSVinstallDir [file normalize "$env(ANNOTSV)"]


# Setting of g_AnnotSV(installDir), g_AnnotSV(etcDir), g_AnnotSV(docDir), g_AnnotSV(tclDir) and g_AnnotSV(bashDir):
set binDirFromScript [getScriptDirectory]
regsub "/bin$" $binDirFromScript "" binDirFromScript
set g_AnnotSV(installDir) "$binDirFromScript"
set g_AnnotSV(etcDir)     "$g_AnnotSV(installDir)/etc/AnnotSV"
set g_AnnotSV(docDir)     "$g_AnnotSV(installDir)/share/doc/AnnotSV"
set tclVersion [info tclversion]
set g_AnnotSV(tclDir)     "$g_AnnotSV(installDir)/share/tcl${tclVersion}/AnnotSV"
if {![file exists $g_AnnotSV(tclDir)]} {
    set g_AnnotSV(tclDir) "$g_AnnotSV(installDir)/share/tcl/AnnotSV"
}
set g_AnnotSV(bashDir)    "$g_AnnotSV(installDir)/share/bash/AnnotSV"
# g_AnnotSV(annotationsDir) is set to "$g_AnnotSV(installDir)/share/AnnotSV" in AnnotSV-config.tcl

# Setting of "auto_path" for the use of the Tcl packages added in AnnotSV
set auto_path [linsert $::auto_path 0 $g_AnnotSV(tclDir)/tcllib]

source $g_AnnotSV(tclDir)/AnnotSV-benignsv.tcl
source $g_AnnotSV(tclDir)/AnnotSV-candidategenes.tcl
source $g_AnnotSV(tclDir)/AnnotSV-clingen.tcl
source $g_AnnotSV(tclDir)/AnnotSV-closestgenes.tcl
source $g_AnnotSV(tclDir)/AnnotSV-config.tcl
source $g_AnnotSV(tclDir)/AnnotSV-cosmic.tcl
source $g_AnnotSV(tclDir)/AnnotSV-cytoband.tcl
source $g_AnnotSV(tclDir)/AnnotSV-encodeblacklist.tcl
source $g_AnnotSV(tclDir)/AnnotSV-exac.tcl
source $g_AnnotSV(tclDir)/AnnotSV-exomiser.tcl
source $g_AnnotSV(tclDir)/AnnotSV-extann.tcl
source $g_AnnotSV(tclDir)/AnnotSV-filteredVCF.tcl
source $g_AnnotSV(tclDir)/AnnotSV-gap.tcl
source $g_AnnotSV(tclDir)/AnnotSV-gccontent.tcl
source $g_AnnotSV(tclDir)/AnnotSV-gencc.tcl
source $g_AnnotSV(tclDir)/AnnotSV-general.tcl
source $g_AnnotSV(tclDir)/AnnotSV-genes.tcl
source $g_AnnotSV(tclDir)/AnnotSV-haploinsufficiency.tcl
source $g_AnnotSV(tclDir)/AnnotSV-help.tcl
source $g_AnnotSV(tclDir)/AnnotSV-loeuf-pLI.tcl
source $g_AnnotSV(tclDir)/AnnotSV-ncbi.tcl
source $g_AnnotSV(tclDir)/AnnotSV-omim.tcl
source $g_AnnotSV(tclDir)/AnnotSV-pathogenicsv.tcl
source $g_AnnotSV(tclDir)/AnnotSV-pathogenicsnvindel.tcl
source $g_AnnotSV(tclDir)/AnnotSV-phenogenius.tcl
source $g_AnnotSV(tclDir)/AnnotSV-regulatoryelements.tcl
source $g_AnnotSV(tclDir)/AnnotSV-ranking.tcl
source $g_AnnotSV(tclDir)/AnnotSV-repeat.tcl
source $g_AnnotSV(tclDir)/AnnotSV-segdup.tcl
source $g_AnnotSV(tclDir)/AnnotSV-tad.tcl
source $g_AnnotSV(tclDir)/AnnotSV-userBED.tcl
source $g_AnnotSV(tclDir)/AnnotSV-variantconvert.tcl
source $g_AnnotSV(tclDir)/AnnotSV-vcf.tcl
source $g_AnnotSV(tclDir)/AnnotSV-write.tcl

# Setting of g_AnnotSV(Version):
if {![info exists g_AnnotSV(Version)]} {
    set configFile "$g_AnnotSV(tclDir)/AnnotSV-config.tcl"
    foreach L [LinesFromFile $configFile] {
	if {[regexp "^# AnnotSV (\[0-9\]+.\[0-9\]+(.\[0-9\]+)?)" $L match version]} {
	    set g_AnnotSV(Version) "$version"
	    break
	}
    }
}
if {![info exists g_AnnotSV(Version)]} {
    set g_AnnotSV(Version) "X.X"
}

puts "AnnotSV $g_AnnotSV(Version)"
if {[regexp "\-version|\-Version" $argv]} {
    exit
}
puts ""
puts "Copyright (C) 2017-current GEOFFROY Veronique"
puts ""
puts "Please feel free to create a Github issue for any suggestions or bug reports (https://github.com/lgmgeo/AnnotSV/issues)"
puts ""
puts "Tcl/Tk version: $tclVersion"
puts ""
puts "Application name used:"
puts "$g_AnnotSV(installDir)\n\n"

set tclVersion [split $tclVersion "."]
if {[lindex $tclVersion 0] < 8 || ([lindex $tclVersion 0] eq 8 && [lindex $tclVersion 1] < 5)} {
    puts "AnnotSV requires a release of the Tcl distribution starting with version 8.5."
    puts "(AnnotSV has not been tested with lower version)"
}

## No argument given:
if {$argv == ""} {
    puts "Arguments are missing see help below\n"
    showHelp; exit 0
}

## Needing help?
if {[regexp -nocase "help" $argv]} {showHelp; exit 0}

## Downloading configuration:
configureAnnotSV $argv


## Depending of the VCF or BED input format:
if {[regexp -nocase "\\.vcf(.gz)?$" $g_AnnotSV(SVinputFile)]} {
    ## SVinputfile is a VCF?
    ## -> need to be formated in bed
    set g_AnnotSV(bedFile) [VCFsToBED "$g_AnnotSV(SVinputFile)"]
} else {
    ## SVinputfile is a BED
    set g_AnnotSV(bedFile) $g_AnnotSV(SVinputFile)
    regsub -nocase ".bed$" $g_AnnotSV(bedFile) ".header.tsv" BEDinputHeaderFile
    if {"$g_AnnotSV(bedFile)" ne "$BEDinputHeaderFile"} {file delete -force $BEDinputHeaderFile}
    createBEDinputHeaderFile
    addNAinSamplesIDbedCol
} 

## Check VariantConvert configuration:
######################################
checkVariantconvertConfigfile

## Genes annotations:
#####################

puts "...checking the annotation data sources ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"

# Annotation of genes depending of the transcripts (RefSeq or ENSEMBL) ?
if {$g_AnnotSV(tx) eq "RefSeq"} {
    checkGenesRefSeqFile
} else {
    checkGenesENSEMBLfile
}
# Annotation with GenCC?
checkGenCCgeneFile
# Annotation with OMIM?
checkNCBIandHGNC
memorizeGeneNameAlias
checkOMIMfile
checkMorbidfile
# Annotation with HI (Haploinsufficiency)?
checkHIfile
# Annotation with ClinGen?
checkClinGenFile
# Annotation with Exomiser?
# checkNCBI (done with OMIM)
checkExomiserInstallation
# Annotation with PhenoGenius?
checkPhenoGeniusCli
# Annotation with LOEUF
checkLOEUFfile

## SVincludedInFt:
##################
# Annotations with benign genes or genomic regions?
checkBenignFiles
checkOverlappedGenesBenignFiles

# Annotation with GeneIntolerance (ExAC)?
checkGeneIntoleranceFile
checkCNVintoleranceFile

## Breakpoint annotations:
##########################
# Annotation with GC content?
checkFASTAfiles
# Annotation with Repeat?
checkRepeatFile
# Annotation with Segmental Duplication?
checkSegDupFile
# Annotation with Gap?
checkGapFile
# Annotation with ENCODE blacklist?
checkENCODEblacklistFile
# Cytoband
checkCytoband

## FtIncludedInSV annotations:
##############################
# Annotations with pathogenic genes or genomic regions?
checkPathogenicFiles
# Annotations with pathogenic SNV/indel?
checkPathoSNVindelFile
# Annotation with Regulatory Elements?
checkPromoterFile
checkEAfiles
checkGHfiles
checkMiRTargetLinkFiles
checkABCfiles
checkMPRAfiles
# Annotation with TAD?
checkTADfiles
# Annotation with COSMIC?
checkCOSMICfile

# Users BED regions annotations files:
# (from $ANNOTSV/share/AnnotSV/Annotations_$g_AnnotSV(organism)/Users/GRCh*/*ncludedIn*/*.bed)
##############################################################################################
checkUsersBED

# Users Gene-based annotation files:
# (from $ANNOTSV/Annotations_$g_AnnotSV(organism)/*/ and from user command lines "-externalGeneFiles")
######################################################################################################
# g_AnnotSV(extann) has been initialized in AnnotSV-config.tcl
# g_AnnotSV(extann) will be completed in AnnotSV-exomiser.tcl
set geneBasedDir "$g_AnnotSV(annotationsDir)/Annotations_$g_AnnotSV(organism)/Gene-based"
foreach annotFile [glob -nocomplain $geneBasedDir/*/*.tsv] {
    if {[regexp "_DGV_samplesInStudies.tsv$" $annotFile]} {continue}
    lappend g_AnnotSV(extann) $annotFile
}
foreach annotFile [glob -nocomplain $geneBasedDir/*/*.tsv.gz] {
    lappend g_AnnotSV(extann) $annotFile
}
set userDir "$g_AnnotSV(annotationsDir)/Annotations_$g_AnnotSV(organism)/Users/" 
foreach annotFile [glob -nocomplain $userDir/*.tsv] {
    lappend g_AnnotSV(extann) $annotFile
}
foreach annotFile [glob -nocomplain $userDir/*.tsv.gz] {
    lappend g_AnnotSV(extann) $annotFile
}
foreach annotFile $g_AnnotSV(externalGeneFiles) {
    lappend g_AnnotSV(extann) $annotFile
}
# Depending of the organism, genes based annotation can be absent:
if {$g_AnnotSV(extann) eq ""} {set g_AnnotSV(geneBasedAnn) 0} else {set g_AnnotSV(geneBasedAnn) 1} 



# DISPLAY
puts ""
puts "...listing arguments"
puts "\t******************************************"
puts "\tAnnotSV has been run with these arguments:"
puts "\t******************************************"
set lKey [array names g_AnnotSV]
foreach key [lsort $lKey] {
    if {[regexp "ABCann|GHann|MPRAann|EAann|Ann|bashDir|bedFile|docDir|etcDir|extann|installDir|outputColHeader|ranking|genes|samplesidTSVcol|svtTSVcol|tclDir|userDir|Version$" $key]} {continue}
    if {$g_AnnotSV($key) eq ""} {continue}
    puts "\t-$key $g_AnnotSV($key)"
}
puts "\t******************************************\n"


# Annotation with the gene track 
genesAnnotation
OrganizeAnnotation

if {[regexp "\\.bed$" $g_AnnotSV(SVinputFile)]} {
    file delete -force $BEDinputHeaderFile
}
puts "\n...AnnotSV is done with the analysis ([clock format [clock seconds] -format "%B %d %Y - %H:%M"])"


