#!/usr/bin/env perl

use strict;
use Config;
use Fcntl qw(SEEK_SET);
use File::Basename;
use File::Copy;
use File::Spec;
use FindBin qw($Bin);
use Fcntl qw(:flock SEEK_SET SEEK_END);
use Getopt::Long qw(:config no_ignore_case);

use lib $Bin . "/lib";

use Core::Utils;
use Core::Mathematics qw(:all);
use Core::Process::Queue;
use Core::Statistics;
use Data::IO::Sequence;
use Data::Sequence::Utils;
use Graphics::Chart::Barplot;
use Graphics::Image;
use RF::Data::RC;
use RF::Data::IO::RC;
use RF::Utils;
use Term::Table;
use Term::Progress;
use Term::Progress::Multiple;
use Term::Constants qw(:screen);

$|++;

die "\n  [!] Error: This program requires ithreads." .
    "\n             Please recompile Perl with ithreads support and try again\n\n" unless(defined $Config{useithreads});

my ($tmpdir, $output, $wt, $samtoolsParams, $spacer,
    $samtools, $multifasta, $sam, $progressBar, 
    $multiBar, $help, $overwrite, $error, $bam_trim5,
    $offset, $threads, $processmanager, $estimatedSize,
    $table, $mutcount, $rcio, $covonly, $nodel, $baseRC, 
    $pp, $po, $nodiscarddup, $minqual, $maxdel, $maxmut, 
    $hashead, $mapqual, $noambiguous, $medianqual, $noins,
    $collapse, $maxmutdist, $evalsurround, $leftalign,
    $discardshorter, $leftdel, $rightdel, $rmconsecutive,
    $primaryonly, $onlyMut, $blockSize, $libraryStrandAll,
    $noCovLowQual, $image, $R, @bam_trim5, @qcounter, @stats,
    %transcripts, %spacer, %files, %realid, %onlyMut, 
    %blockData, %allStats, %allFreqs, %inHeaders);

do {

    local $SIG{__WARN__} = sub { };

    GetOptions( "h|help"                     => \$help,
                "br|baseRC=s"                => \$baseRC,
                "t|tmp-dir=s"                => \$tmpdir,
                "o|output-dir=s"             => \$output,
                "ow|overwrite"               => \$overwrite,
                "t5|trim-5prime=s"           => \$bam_trim5,
                "wt|working-threads=i"       => \$wt,
                "ls|library-strandedness=s"  => \$libraryStrandAll,
                "s|samtools=s"               => \$samtools,
                "p|processors=i"             => \$threads,
                "f|fasta=s"                  => \$multifasta,
                "m|count-mutations"          => \$mutcount,
                "co|coverage-only"           => \$covonly,
                "nd|no-deletions"            => \$nodel,
                "ni|no-insertions"           => \$noins,
                "md|max-deletion-len=i"      => \$maxdel,
                "pp|properly-paired"         => \$pp,
                "po|paired-only"             => \$po,
                "q|min-quality=i"            => \$minqual,
                "mq|map-quality=i"           => \$mapqual,
                "me|max-edit-distance=s"     => \$maxmut,
                "na|no-ambiguous"            => \$noambiguous,
                "la|left-align"              => \$leftalign,
                "eq|median-quality=i"        => \$medianqual,
                "cc|collapse-consecutive"    => \$collapse,
                "mc|max-collapse-distance=i" => \$maxmutdist,
                "es|eval-surrounding"        => \$evalsurround,
                "ds|discard-shorter=i"       => \$discardshorter,
                "ld|left-deletion"           => \$leftdel,
                "rd|right-deletion"          => \$rightdel,
                "dc|discard-consecutive=i"   => \$rmconsecutive,
                "pn|primary-only"            => \$primaryonly,
                "om|only-mut=s"              => \$onlyMut,
                "ndd|no-discard-duplicates"  => \$nodiscarddup,
                "bs|block-size=s"            => \$blockSize,
                "ncl|no-cov-low-qual"        => \$noCovLowQual,
                "P|per-file-progress"        => \$multiBar,
                "g|img"                      => \$image,
                "R|R-path=s"                 => \$R ) or help(1);

};

help() if ($help);

# Default values
$output ||= "rf_count_genome/";
$wt ||= 1;
$offset = 0;
$threads ||= 1;
$bam_trim5 //= 0;
$minqual //= 20;
$medianqual //= 20;
$mapqual //= 0;
$maxdel //= 10;
$maxmutdist //= 2;
$maxmut ||= 0.15;
$discardshorter //= 1;
$rmconsecutive //= 0;
$blockSize ||= 100000;
$samtools ||= which("samtools");
$R = checkRinstall($R) if ($image && !$covonly);

$output =~ s/\/?$/\//;
$tmpdir = $output . "tmp/";
$multiBar = 0 if (!-t STDOUT);

##
# Input validation
##

die "\n  [!] Error: No sample SAM/BAM file provided\n\n" if (!@ARGV);
die "\n  [!] Error: No FASTA nor base RC file provided\n\n" if (!defined $multifasta && !defined $baseRC);
die "\n  [!] Error: Provided FASTA file doesn't exist\n\n" if (defined $multifasta && !-e $multifasta);
die "\n  [!] Error: Provided base RC file does not exist\n\n" if (defined $baseRC && !-e $baseRC);
die "\n  [!] Error: Parameters -co and -m are mutually exclusive\n\n" if ($mutcount && $covonly);
die "\n  [!] Error: Working threads value must be an integer greater than 0\n\n" if (!isint($wt) || $wt < 1);
die "\n  [!] Error: Invalid format for -t5 parameter's argument\n\n" if (defined $bam_trim5 && $bam_trim5 !~ m/^(\d+[;,]?)+$/);
die "\n  [!] Error: Number of processors must be an integer greater than 0\n\n" if ($threads < 1);
die "\n  [!] Error: Parameter -nd requires parameter -m\n\n" if ($nodel && !$mutcount);
die "\n  [!] Error: Minimum quality score value must be and integer >= 0 and <= 41\n\n" if (!inrange($minqual, [0, 41]));
die "\n  [!] Error: Median read's quality score value must be and integer >= 0 and <= 41\n\n" if (!inrange($medianqual, [0, 41]));
die "\n  [!] Error: Maximum edit distance value must be > 0 and <= 1\n\n" if (!inrange($maxmut, [0, 1]) || !$maxmut);
die "\n  [!] Error: Parameters -na and -la are mutually exclusive\n\n" if ($noambiguous && $leftalign);
die "\n  [!] Error: Parameters -cc and -dc are mutually exclusive\n\n" if ($collapse && $rmconsecutive);
die "\n  [!] Error: Discard shorter must be a positive INT >= 0\n\n" if (!isint($discardshorter) || !ispositive($discardshorter));
die "\n  [!] Error: Invalid library strandedness\n\n" if (defined $libraryStrandAll && $libraryStrandAll !~ /^(?:1|f(?:irst)?(?:-strand)?)|(?:2|s(?:econd)?(?:-strand)?)|(?:0|u(?:nstranded)?)$/);
die "\n  [!] Error: Block size must be a positive INT >= 1000\n\n" if (!isint($blockSize) || $blockSize < 1000);

warn "\n  [!] Warning: Some input files are duplicates. Considering only unique files...\n" if (@ARGV != uniq(@ARGV));

if ($image && $covonly) {

    print "\n  [i] Note: No plot will be generated in -co (--coverage-only) mode...\n";
    undef($image);

}

if ($mutcount && $onlyMut) {

    # Automatically disables counting indels
    $nodel = 1;
    $noins = 1;

    %onlyMut = map { my $i = $_; map { $i . $_ => { count => 0,
                                                    take  => 0 } } grep { $i ne $_ } qw(A C G T) } qw(A C G T);

    foreach my $mutation (split(/[,;]/, $onlyMut)) {

        my @bases = split(/[:2>]/, $mutation);

        die "\n  [!] Error: Invalid format for -mo parameter's argument \"" . $mutation . "\"\n\n" if (@bases != 2);
        die "\n  [!] Error: Mutation in -mo parameter's argument \"" . $mutation . "\" contains a non-IUPAC character\n\n" if (!isiupac(join("", @bases)));

        foreach my $base1 (split //, (iupac2nt($bases[0]))[0]) {

            foreach my $base2 (grep { $base1 ne $_ } split //, (iupac2nt($bases[1]))[0]) { $onlyMut{$base1 . $base2}->{take} = 1; }

        }

    }

}

if (!defined $samtools) { die "\n  [!] Error: samtools is not in PATH\n\n"; }
elsif (!-e $samtools) { die "\n  [!] Error: samtools doesn't exist\n\n"; }
elsif (!-x $samtools) { die "\n  [!] Error: samtools is not executable\n\n"; }
else {

    my $ret = `$samtools 2>&1`;

    if ($ret =~ m/Version: ([\d\.]+)/) {

        my $version = $1;

        die "\n  [!] Error: RF Count requires SAMTools v1 or greater (Detected: v" . $version . ")\n\n" if (substr($version, 0, 1) < 1);

    }
    else { warn "\n  [!] Warning: Unable to detect SAMTools version\n"; }

    my (@noFlags, @yesFlags);
    @noFlags = qw(UNMAP QCFAIL);
    push(@yesFlags, "PAIRED") if ($pp || $po);
    push(@yesFlags, "PROPER_PAIR") if ($pp);
    push(@noFlags, "REVERSE") if (!$mutcount && !$covonly);
    push(@noFlags, "SECONDARY") if ($primaryonly);
    push(@noFlags, "DUP") if (!$nodiscarddup);

    $samtoolsParams = "-f " . join(",", uniq(@yesFlags)) . " " if (@yesFlags);
    $samtoolsParams .= "-F " . join(",", uniq(@noFlags)) . " -q $mapqual";

}

print "\n[+] Making output directory...";

if (-e $output) {

    if ($overwrite) {

        my $error = rmtree($output);

        die "\n\n  [!] Error: " . $error . "\n\n" if ($error);

    }
    else { die "\n\n  [!] Error: Output directory already exists." .
               "\n      Please use -ow (or --overwrite) to overwrite output directory\n\n"; }

}

if (my $error = mktree($tmpdir, "755")) { die "\n\n  [!] Error: Unable to create output directory ($error)\n\n"; }
mktree("${output}plots/", "755") if ($image);
mktree("${output}frequencies/", "755") if ($onlyMut && $mutcount);

##
# Prepare files
##

$table = Term::Table->new(indent => 2);
$table->head("Sample", "File format", "Sorted", "Indexed", "Library type", "5'-end trimming");

@bam_trim5 = split(/,/, $bam_trim5);

undef($bam_trim5);
$bam_trim5 = shift(@bam_trim5) if (@bam_trim5 == 1);        # If only one value for 5' trimming in SAM/BAM files has been specified,
                                                            # this is applied to all the passed SAM/BAM files

print "\n[+] Checking files:\n\n";

foreach my $sample (uniq(@ARGV)) {

    my ($file, $path, $extension, $format,
        $sorted, $libraryType, $missingMD);
    ($sample, $libraryType) = split(":", $sample);

    die "  [!] Error: Specified sample file \"$sample\" doesn't exist\n\n" if (!-e $sample);

    ($file, $path, $extension) = fileparse($sample, qr/\.[^.]*/);
    ($format, $sorted, $missingMD) = guessTypeAndSorting($sample);

    if (!$libraryType) {

        if (!$libraryStrandAll) {

            if ($mutcount || $covonly) { $libraryType = "unstranded"; }
            else { $libraryType = "second-strand"; }

        }
        else { $libraryType = $libraryStrandAll; }

    }

    if ($libraryType =~ m/^(?:1|f(?:irst)?(?:-strand)?)$/i) { $libraryType = "first-strand"; }
    elsif ($libraryType =~ m/^(?:2|s(?:econd)?(?:-strand)?)$/i) { $libraryType = "second-strand"; }
    elsif ($libraryType  =~ m/^(?:0|u(?:nstranded)?)$/i) { $libraryType = "unstranded"; }
    else { die "  [!] Error: Invalid library type for sample \"" . $file . "\"\n\n"; }

    die "  [!] Error: Library type for RT-stop experiments can only be second-strand\n\n" if ($libraryType ne "second-strand" && !$mutcount && !$covonly);

    push(@qcounter, { path      => $sample,
                      file      => exists $files{$file} ? $file . "_" . $files{$file} : $file,
                      type      => $format,
                      sorted    => $sorted,
                      indexed   => -e "$sample.bai" || -e "$sample.csi" ? 1 : 0,
                      library   => $libraryType,
                      trim5     => 0,
                      missingMD => $missingMD });

    $files{$file}++;
    $spacer{$qcounter[-1]->{file}} = length($qcounter[-1]->{file});
    $spacer = "\n" if (!$qcounter[-1]->{sorted} || $qcounter[-1]->{type} eq "SAM" || $qcounter[-1]->{missingMD} || !$qcounter[-1]->{indexed});

    # If only one value for 5' trimming in SAM/BAM files has been specified, this is applied to all the passed SAM/BAM files
    $qcounter[-1]->{trim5} = isint($bam_trim5) && ispositive($bam_trim5) ? $bam_trim5 : shift(@bam_trim5);

    die "  [!] Error: Fewer 5'-end trimming values in -t5 list than provided SAM/BAM files\n\n" if (!defined $qcounter[-1]->{trim5});
    die "  [!] Error: 5'-end trimming value must be a positive integer\n\n" if (!ispositive($qcounter[-1]->{trim5}) || !isint($qcounter[-1]->{trim5}));

    $table->row($qcounter[-1]->{file}, $qcounter[-1]->{type}, $sorted ? "Yes" : "No", 
                $qcounter[-1]->{indexed} ? "Yes" : "No", $libraryType, ($mutcount || $covonly) ? "Ignored" : $qcounter[-1]->{trim5} . " nt");

}

%spacer = map { $_ => 1 + max(values %spacer) - $spacer{$_} } (keys %spacer);

die "  [!] Error: More 5'-end trimming values in -t5 list than provided SAM/BAM files\n\n" if (@bam_trim5);

$table->print();

print "\n";

# Starts the process manager
$processmanager = Core::Process::Queue->new( processors => $threads,
                                             stderr     => $output . "samtools.log",
                                             tmpDir     => $tmpdir,
                                             verbosity  => 1 );

##
# FASTA Parsing
##

print "\n[+] Importing chromosomes from reference, and building count table base structure...";

if (!defined $baseRC) {

    my $seqio = Data::IO::Sequence->new( file              => $multifasta,
                                         maskIUPAC         => 1,
                                         checkDuplicateIds => 1 );
    $rcio = RF::Data::IO::RC->new( file       => $tmpdir . "base.rc",
                                   index      => $output . "index.rci",
                                   buildindex => 1,
                                   mode       => "w" );

    while (my $entry = $seqio->read()) {

        my ($rentry, $id);
        $id = $entry->id();
        #$id =~ s/[^\w\.-]/_/g;    # Fixes sequence ids that can cause errors at later stages

        $entry->unmask(); # Makes sequence uppercase

        $rentry = RF::Data::RC->new( id       => $id,
                                     sequence => $entry->sequence() );
        $rcio->write($rentry);

        # Store length for later steps
        $transcripts{$id} = $entry->length();
        $realid{$id} = $entry->id();
        $realid{$entry->id()} = $id;

    }

    $baseRC = $tmpdir . "base.rc";

}
else {

    $rcio = RF::Data::IO::RC->new(file => $baseRC);

    for ($rcio->ids()) {

        $transcripts{$_} = $rcio->length($_);
        $realid{$_} = $_;

    }

}

$rcio->close();

$estimatedSize = sum(map { 12 + ($transcripts{$_} + ($transcripts{$_} % 2)) / 2 + 4 * $transcripts{$_} * 2  } keys %transcripts);
$estimatedSize *= sum(map { $_->{library} eq "unstranded" ? 1 : 2 } @qcounter);

if (my $spaceLeft = spaceLeft($output)) { die "\n\n  [!] Error: Insufficient space on disk (needed: " . bytesToHuman($estimatedSize) . ", available: " . bytesToHuman($spaceLeft) . ")\n\n" if ($estimatedSize > $spaceLeft); }

##
# SAM/BAM Header validation
##

print "\n[+] Inspecting SAM/BAM file headers...";

foreach my $sample (@qcounter) {

    my $inheader = 0;

    open(my $fh, "-|", $samtools . " view -H " . $sample->{path} . " 2>&1") or die "\n\n  [!] Error: Unable to read SAM/BAM header from sample \"" . $sample->{file} . "\" (" . $! . ")\n\n";

    while (my $row = <$fh>) {

        chomp($row);

        if ($row =~ m/^\@SQ\tSN:(.+?)\tLN:(\d+)$/) {

            my ($id, $length) = ($1, $2);
            #$id =~ s/\//_/g;

            if (exists $transcripts{$id}) {

                die "\n\n  [!] Error: Chromosome \"" . $id . "\" length from sample \"" . $sample->{file} . "\" header (" . $length . " nt) differs from reference (" . $transcripts{$id} . " nt)." .
                    "\n             Please re-map your dataset using the same reference, and try again.\n\n" if ($transcripts{$id} != $length);

                $inheader++;

            }

            $inHeaders{$id} = 1;

        }

    }
    close($fh);

    die "\n\n  [!] Error: All chromosomes in sample \"" . $sample->{file} . "\" header are absent in reference." .
        "\n             Please re-map your dataset using the same reference, or provide a different reference by the -f (or --fasta) parameter.\n\n" if (!$inheader);

    warn "\n\n  [!] Warning: Only " . $inheader . "/" . keys(%transcripts) . " reference chromosomes are present in sample \"" . $sample->{file} . "\" header." .
         "\n               All chromosomes absent in reference will be skipped." if ($inheader != keys(%transcripts));

}

for (keys %transcripts) { delete($transcripts{$_}) if (!exists $inHeaders{$_}); }

print "\n$spacer";

##
# Sorting BAM files (if needed)
##

if (my @unsorted = grep { !$_->{sorted} } @qcounter) { 

    foreach my $sample (@unsorted) {

        my $path = $tmpdir . $sample->{file} . "_sorted.bam";

        $processmanager->enqueue( command => "$samtools sort --no-PG -@ $wt -O BAM -T \"" . $tmpdir . $sample->{file} . "\" -o \"$path\" \"" . $sample->{path} . "\"",
                                  id      => $sample->{file} );

        $sample->{path} = $path;
        $sample->{type} = "BAM";

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Sorting " . scalar(@unsorted) . " unsorted SAM/BAM file(s)");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to index sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

##
# Converting SAM>BAM files (if needed)
##

if (my @sam = grep { $_->{type} eq "SAM" } @qcounter) { 

    foreach my $sample (@sam) {

        my $path = $tmpdir . $sample->{file} . ".bam";

        $processmanager->enqueue( command => "$samtools view --no-PG -@ $wt -O BAM -o \"$path\" \"" . $sample->{path} . "\"",
                                  id      => $sample->{file} );

        $sample->{path} = $path;
        $sample->{type} = "BAM";

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Converting " . scalar(@sam) . " sorted SAM file(s) to BAM format");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to index sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

##
# Computing MD tag (if needed)
##

if (my @missingMD = grep { $_->{missingMD} } @qcounter) { 

    foreach my $sample (@missingMD) {

        my $path = $tmpdir . $sample->{file} . ".withMD.bam";

        $processmanager->enqueue( command => "$samtools calmd --no-PG -@ $wt -b \"" . $sample->{path} . "\" \"$multifasta\" > \"$path\"",
                                  id      => $sample->{file} );

        $sample->{path} = $path;

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Computing missing MD tag for " . scalar(@missingMD) . " files");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to compute MD tag for sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

##
# Indexing BAM files (if needed)
##

if (my @toIndex = grep { !$_->{indexed} } @qcounter) { 

    foreach my $sample (@toIndex) {

        $processmanager->enqueue( command => "$samtools index -@ $wt \"" . $sample->{path} . "\"",
                                  id      => $sample->{file} );

        $sample->{indexed} = 1;

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Indexing " . scalar(@toIndex) . " BAM file(s)");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to index sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
         
    });
    $processmanager->start();
    $processmanager->waitall();

}

print "$spacer\n[+] Copying RC base structure...";

$processmanager->stderr($output . "error.out");
$processmanager->parentOnExit(sub {});

foreach my $sample (@qcounter) {

    $processmanager->enqueue( command => sub { copy($baseRC, $output . $sample->{file} . ".plus.rc") or die $!; return(); },
                              id      => $sample->{file} . "_plus");
    $processmanager->enqueue( command => sub { copy($baseRC, $output . $sample->{file} . ".minus.rc") or die $!; return(); },
                              id      => $sample->{file} . "_minus" ) if ($sample->{library} ne "unstranded");

}

$processmanager->start();
$processmanager->waitall();

while (my $sample = $processmanager->dequeue()) { die "\n\n  [!] Error: Unable to copy counts table structure for sample \"" . $sample->exitcode()->[0] . "\"\n\n" if ($sample->exitcode()->[0]); }

print "\n[+] Queuing jobs...";

for(my $i = 0; $i < @qcounter; $i++) {

    my ($sample, @sampleIds);
    $sample = $qcounter[$i];

    open(my $fh, "-|", "$samtools idxstats -@ " . ($threads * $wt) . " " . $sample->{path}) or die "\n\n  [!] Error: Unable to get IDs of covered transcripts ($!)\n\n";
    while(<$fh>) {

        chomp;
        my @entry = split /\t/;
        #$entry[0] =~ s/[^\w\.\-:]/_/g;
        push(@sampleIds, $entry[0]) if ($entry[2]); # Only take IDs with alignments

    }
    close($fh);

    if (!@sampleIds) {

        splice(@qcounter, $i, 1);
        $i--;

    }

    foreach my $chromosome (@sampleIds) {

        $processmanager->enqueue( command   => \&count,
                                  arguments => [ $sample, $chromosome ],
                                  id        => $sample->{file} );

    }

}

die "\n\n  [!] Error: All sample files are empty (no alignments). Nothing to be processed.\n\n" if (!$processmanager->queueSize());

$processmanager->shuffleQueue();

print "\n[+] Calculating per-base " . ($covonly ? "coverage" : ($mutcount ? "mutation counts" : "RT-stops") . " and coverage") . ". This may take a while...\n\n";

if ($multiBar) {

    $progressBar = Term::Progress::Multiple->new( sets    => { map { $_->{file} => scalar($processmanager->listQueue($_->{file})) } @qcounter },
                                                  colored => 1 ); 
    $progressBar->initAll();

}
else {

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init();

}

$processmanager->processors(min(ncores(), $threads * $wt));
$processmanager->parentOnExit(sub {

    # Here we retrieve the partials from each process and combine them
    my ($sample, $pid, $uid, $stats);
    ($sample, $pid, $uid) = @_[0..2];
    $stats = ($processmanager->dequeue($uid)->exitcode())[0];
    
    if (ref($stats) eq "HASH") {

        my $errMsg = $stats->{error} || $stats->{warning};

        if ($multiBar) { $progressBar->update($sample); }
        else { $progressBar->update($errMsg ? (1, "$sample: $errMsg") : 1); }

        if (defined $stats->{error} || (defined $stats->{warning} && !$stats->{total})) { 
                
            $progressBar->appendText($sample, $errMsg) if ($multiBar);
            
            if (defined $stats->{error}) {

                $allStats{$sample}->{error} = $errMsg;
                $processmanager->deleteQueue($sample);
                $processmanager->killById($sample);

            }

        }
        else {

            $progressBar->appendText($sample, $errMsg) if ($multiBar && $errMsg);

            $allStats{$sample}->{$_} += $stats->{$_} for (qw(A C G T mutated total totalPrimary));
            $allStats{$sample}->{covered}++ if ($stats->{total});

            if ($mutcount && $onlyMut) { $allFreqs{$sample}->{$_}->{count} += $stats->{onlyMut}->{$_}->{count} for (keys %{$stats->{onlyMut}}); }

        }

    }

});

$processmanager->start();
$processmanager->waitall();

print "\n";

$table = Term::Table->new(indent => 2);

if ($covonly) { $table->head("Sample", "Covered"); }
elsif ($mutcount) { $table->head("Sample", "Covered", "Mutated alignments", "\%A muts", "\%C muts", "\%G muts", "\%U muts"); }
else { $table->head("Sample", "Covered", "\%A stops", "\%C stops", "\%G stops", "\%U stops"); }

foreach my $sample (grep { !defined $allStats{$_}->{error} } sort keys %allStats) {

    my (@row, @rc);
    @row = ($sample, $allStats{$sample}->{covered} || 0);
    @rc = ($output . $sample . ".plus.rc");
    push(@rc, $output . $sample . ".minus.rc") if (-e $output . $sample . ".minus.rc");

    if (!$covonly && $allStats{$sample}->{covered}) {

        my ($total, $mutReads, @bases);
        $total = sum(map { $allStats{$sample}->{$_} } qw(A C G T));
        $mutReads = 0;

        if ($mutcount) {

            if ($allStats{$sample}->{total}) {

                $mutReads = sprintf("%.2f", $allStats{$sample}->{mutated} / $allStats{$sample}->{total} * 100);
                push(@row, $allStats{$sample}->{mutated} . "/" . $allStats{$sample}->{total} . " ($mutReads\%)");

            }
            else { push(@row, "-"); }

        }

        if ($total) {

            @bases = map { sprintf("%.2f", $allStats{$sample}->{$_} / $total * 100 ) } qw(A C G T);
            push(@row, @bases);

        }
        else { push(@row, ("-") x 4); }

        push(@stats, [ $sample, @bases, $mutReads ]);


    }

    foreach my $rc (@rc) { # Updates the read count in the RC file
        
        $rcio = RF::Data::IO::RC->new( file           => $rc,
                                       mode           => "w+",
                                       noPreloadIndex => 1 );
        $rcio->mappedreads($allStats{$sample}->{totalPrimary});
        $rcio->close();
    
    }

    if ($mutcount && $onlyMut) {

        my ($totMuts, $freqIO);
        $totMuts = sum(map { $allFreqs{$sample}->{$_}->{count} } keys %{$allFreqs{$sample}});
        $freqIO = Data::IO->new( file  => $output . "frequencies/" . $sample . ".txt",
                                 mode  => "w+",
                                 flush => 1 );
        $freqIO->write(join("\n", map { join("\t", $_, $totMuts ? sprintf("%.6f", $allFreqs{$sample}->{$_}->{count} / $totMuts) : "NaN") } sort keys %{$allFreqs{$sample}}) . "\n");

    }

    $table->row(@row);

}

if ($table->nRows()) {

    print "\n[+] Statistics:\n\n";
    $table->print();
    print "\n";

}

if (my @failed = grep { defined $allStats{$_}->{error} } sort keys %allStats) {

    print "\n  [!] Processing failed for sample \"$_\" (" . $allStats{$_}->{error} . ")" for (@failed);
    print "\n";

}

if ($image && @stats) {

    print "\n[+] Generating plots...";

    my ($img, $plot);
    $img = Graphics::Image->new( file   => $output . "plots/base_stats.pdf",
                                 width  => min(scalar(@stats), 40),
                                 height => 8,
                                 R      => $R,
                                 tmpdir => $tmpdir );

    $plot = Graphics::Chart::Barplot->new( x             => "sample",
                                           data          => [ map { @$_[1 .. 4] } @stats ],
                                           dataLabels    => { base   => [ (qw(A C G T)) x @stats ],
                                                              sample => [ map { ($_->[0]) x 4 } @stats ] },
                                           fill          => "base",
                                           groupMethod   => "stack",
                                           dataLabelSort => { base => [ qw(T G C A) ]},
                                           legendSort    => [ qw(A C G T) ],
                                           legendColors  => { A => "#24A349",  
                                                              C => "#4687C7", 
                                                              G => "#EDBA1D",
                                                              T => "#A62324" },
                                           yTitle        => "Per-base " . ($mutcount ? "mutations" : "RT-stops") . " (\%)",
                                           xLabelAngle   => 90,
                                           labelTextSize => 8 );
    $img->plot([$plot]);

    if ($mutcount && sum(map { $_->[-1] } @stats)) {

        $img->file($output . "plots/read_mutation_stats.pdf");
        $plot = Graphics::Chart::Barplot->new( x             => "sample",
                                               data          => [ map { $_->[-1] } @stats ],
                                               dataLabels    => { sample => [ map { $_->[0] } @stats ] },
                                               fill          => "sample",
                                               legend        => 0,
                                               legendColors  => { map { $_->[0] => "black" } @stats },
                                               yTitle        => "Mutated reads (\%)",
                                               xLabelAngle   => 90,
                                               labelTextSize => 8 );

        $img->plot([$plot]);

    }

}

print "\n[+] Cleaning up temporary files...";

cleanup();

print "\n[+] All done.\n\n";

sub count {

    my ($sample, $id) = @_;

    undef($processmanager); # Frees unnecessary memory
    #$id =~ s/[^\w\.-]/_/g;

    my ($stats, %counts, %coverage, %stats, 
        %rcIO, %covered);
    %stats = ( id           => $id,
               A            => 0,
               C            => 0,
               G            => 0,
               T            => 0,
               mutated      => 0,
               total        => 0,
               totalPrimary => 0,
               warning      => undef,
               error        => undef );
    %counts = ( "+" => [],
                "-" => [] );
    %coverage = ( "+" => [],
                  "-" => [] );
    
    $rcIO{"+"} = RF::Data::IO::RC->new( file  => $output . $sample->{file} . ".plus.rc",
                                        index => $output . "index.rci",
                                        mode  => "w+" );
    $rcIO{"-"} = RF::Data::IO::RC->new( file  => $output . $sample->{file} . ".minus.rc",
                                        index => $output . "index.rci",
                                        mode  => "w+" ) if ($sample->{library} ne "unstranded");

    # We need to fix some parameters in case the analysis is MaP and unstranded
    if ($sample->{library} eq "unstranded" && $mutcount) {

        $noambiguous = 1;
        undef($_) for ($leftalign, $rightdel, $leftdel, $collapse, $onlyMut);

    }

    if (open(my $fh, "-|", $samtools . " view $samtoolsParams " . $sample->{path} . " $realid{$id} 2>&1")) {

        while (!eof($fh)) {

            my ($row, $cov, $ins,
                $editdist, $truelen, $gaps,
                $realStrand, $start, $covWithGaps, @row);
            chomp($row = <$fh>);

            if ($row =~ /Could not retrieve index file/) { return({ error => "Error: Missing BAM index" }); }
            elsif ($row =~ /invalid region or unknown reference/) { return({ warning => "Warning: Chromosome \"$realid{$id}\" absent in BAM file" }); }
            elsif ($row =~ /\[.+?\] (.+)$/) { 
                
                $stats{"warning"} = "Warning: $1";
                
                next;
                
            }

            @row = split(/\t/, $row);

            next if ($row[5] eq "*"); # to avoid malformed lines missing a CIGAR string
            
            if ($sample->{library} eq "unstranded") { $realStrand = "+"; }
            else {

                if ($sample->{library} eq "second-strand") { $realStrand = $row[1] & 16 ? ($row[1] & 1 ? ($row[1] & 64 ? "-" : "+") : "-") : ($row[1] & 1 ? ($row[1] & 64 ? "+" : "-") : "+"); }
                else { $realStrand = $row[1] & 16 ? ($row[1] & 1 ? ($row[1] & 64 ? "+" : "-") : "+") : ($row[1] & 1 ? ($row[1] & 64 ? "-" : "+") : "-"); }

            }

            $start = ($mutcount || $covonly || $realStrand eq "-" ? $row[3] : $row[3] - $sample->{trim5}) - 1;
            ($cov, $truelen, $covWithGaps, $ins, $gaps) = parsecigar($row[5]);
            @{$ins} = map { $_ + $start } @{$ins}; # Adjust insertion relative position to true position
            $editdist = editdist($row) + @{$ins}; # Editing distance is calculated this way so that consecutively deleted/inserted bases are counted only once
            
            next if ($start < 0 || (!$mutcount && !$covonly && $realStrand eq "+" && $start - 1 < 0));
            next if ($mutcount && $cov < $discardshorter);
            next if ($mutcount && $editdist / $cov > $maxmut);
            next if ($mutcount && median(map { unpack("C*", $_) - 33 } split(//, $row[10])) < $medianqual); # Check median read's quality
            #next if ($po && $row[1] & 1 && $row[1] & 8); # Read is one of a pair, but one mate is unmapped
            #next if ($pp && $row[1] & 1 && !($row[1] & 2)); # Read is one of a pair, but pair is not properly mapped
            next if ((($row[5] =~ m/^\d+S/ && $realStrand eq "+") || ($row[5] =~ m/\d+S$/ && $realStrand eq "-")) && !$mutcount && !$covonly); # Discard read in RT-count mode, if it has soft/hard clipping at 5'-end
            #next if ($row[4] < $mapqual); # Discard reads with too low mapping quality
            #next if ($primaryonly && $row[1] & 256); # Discard secondary alignments
            #next if ($row[1] & 512); # Discard reads that fail platform quality checks
            #next if ($row[1] & 1024 && !$nodiscarddup); # Discard reads that are optical/PCR duplicates

            if (!defined $blockData{"start"} || $start > $blockData{start} + $blockSize - 1) {

                if (defined $blockData{"start"}) {

                    my $maxSize = max(scalar(@{$counts{"+"}}), scalar(@{$counts{"-"}}), scalar(@{$coverage{"+"}}), scalar(@{$coverage{"-"}}));
                    push(@{$counts{"+"}}, (0) x ($maxSize - scalar(@{$counts{"+"}})));
                    push(@{$coverage{"+"}}, (0) x ($maxSize - scalar(@{$coverage{"+"}})));

                    if ($sample->{library} ne "unstranded") {

                        push(@{$counts{"-"}}, (0) x ($maxSize - scalar(@{$counts{"-"}})));
                        push(@{$coverage{"-"}}, (0) x ($maxSize - scalar(@{$coverage{"-"}})));
                    }

                    if ($blockData{"start"} + $#{$counts{"+"}} <= $start) {

                        $rcIO{"+"}->writeBytewise($id, $blockData{"start"}, $counts{"+"}, $coverage{"+"});
                        $counts{"+"} = [];
                        $coverage{"+"} = [];

                        if ($sample->{library} ne "unstranded") {

                            $rcIO{"-"}->writeBytewise($id, $blockData{"start"}, $counts{"-"}, $coverage{"-"});
                            $counts{"-"} = [];
                            $coverage{"-"} = [];

                        }

                    }
                    else {

                        my (@counts, @coverage);
                        @counts = splice(@{$counts{"+"}}, 0, $start - $blockData{"start"});
                        @coverage = splice(@{$coverage{"+"}}, 0, $start - $blockData{"start"});
                        $rcIO{"+"}->writeBytewise($id, $blockData{"start"}, \@counts, \@coverage);

                        if ($sample->{library} ne "unstranded") {

                            @counts = splice(@{$counts{"-"}}, 0, $start - $blockData{"start"});
                            @coverage = splice(@{$coverage{"-"}}, 0, $start - $blockData{"start"});
                            $rcIO{"-"}->writeBytewise($id, $blockData{"start"}, \@counts, \@coverage);

                        }

                    }

                }
                
                $blockData{"start"} = $start;
                undef($blockData{"sequence"});

            }

            $start -= $blockData{"start"};
            
            if (!defined $blockData{"sequence"} || $start + $covWithGaps >= $blockData{"length"}) {

                $blockData{"sequence"} = $rcIO{"+"}->sequence($id, $blockData{"start"}, min($transcripts{$id} - 1, $blockData{"start"} + $start + $covWithGaps + $blockSize));#max($blockSize, $start + $covWithGaps + $blockSize)));
		        $blockData{"length"} = length($blockData{"sequence"});

            }

            next if (($mutcount || $covonly || $realStrand eq "-") && $start < 0);

            # This happens when we are at the beginning of the block and the rt-stop is before the
            # beginning of the block, then we update directly on the file
            if (!$mutcount && !$covonly && $realStrand eq "+" && $start - 1 < 0) { $rcIO{"+"}->updateBytewise($id, $blockData{"start"} - 1, [1], [1]); }
            else {

                if ($mutcount) {  # Mutations count

                    # Parse the MD flag only when edit distance != 0
                    if ($row !~ m/NM:i:0/) {

                        my ($muts, $missingMD, $lowQual, @uniqmutations);
                        ($muts, $lowQual, $missingMD) = parsemd(\@row, clonearrayref($gaps), $realStrand);

                        if ($missingMD) {

                            $stats{warning} = "Warning: Missing MD tag, run 'samtools calmd' to fix";

                            last;

                        }

                        @$muts = uniq(@$muts, @{$ins}) if (!$noins);
                        @$muts = rmconsecutive(@$muts) if ($rmconsecutive);

                        # Collapsing is done towards the 3' end of the RNA, but if the library is unstranded, we do not know where the 3' end is
                        @$muts = collapsemutations($muts, $realStrand) if ($collapse);
                        
                        @$muts = map { $_ - $blockData{"start"} } @$muts;

                        if (@$muts) {

                            $stats{mutated}++;

                            for (@$muts) {

                                $counts{$realStrand}->[$_]++;
                                my $base = $realStrand eq "+" ? substr($blockData{"sequence"}, $_, 1) : dnarevcomp(substr($blockData{"sequence"}, $_, 1));
                                $stats{$base}++;

                            }

                        }

                        if ($noCovLowQual) { map { $coverage{$realStrand}->[$_ - $blockData{"start"}]-- } @$lowQual; }

                    }

                }
                elsif (!$covonly) {  # RT-stops count mode

                    if ($realStrand eq "+") {

                        $counts{$realStrand}->[$start - 1]++;
                        $coverage{$realStrand}->[$_]++ for ($start - 1 .. $start + $sample->{trim5} - 1);
                        $stats{substr($blockData{"sequence"}, $start - 1, 1)}++;

                    }
                    else {

                        my $rtStopPos = $start + $covWithGaps + $sample->{trim5};

                        # On the minus strand, if the read aligns to the end of the chromosome, the RT-stop would exceed the length
                        next if ($blockData{"start"} + $rtStopPos >= $transcripts{$id});

                        $counts{$realStrand}->[$rtStopPos]++;
                        $coverage{$realStrand}->[$_]++ for ($rtStopPos - $sample->{trim5} .. $rtStopPos);
                        $stats{dnarevcomp(substr($blockData{"sequence"}, $rtStopPos, 1))}++;

                    }

                }

            }

            $stats{totalPrimary}++ unless ($row[1] & 256);
            $stats{total}++;

            if (@$gaps) {

                my ($lastStart, $lastEnd);
                $lastStart = $start;
                $lastStart += $sample->{trim5} if (!$mutcount && !$covonly && $realStrand eq "+");

                foreach my $gap (@$gaps) {

                    $lastEnd = $lastStart + $gap->[0];
                    map { $coverage{$realStrand}->[$_]++ } $lastStart .. $lastEnd;
                    $lastStart = $lastEnd + $gap->[1] + 1;

                }

                $lastEnd = $start + $covWithGaps - 1;
                $lastEnd += $sample->{trim5} if (!$mutcount && !$covonly && $realStrand eq "+");

                map { $coverage{$realStrand}->[$_]++ } $lastStart .. $lastEnd;

            }
            else { map { $coverage{$realStrand}->[$_]++ } $start .. $start + $cov - 1; }

        }

        close($fh);

        # Only if there were reads processed from file
        if (defined $blockData{"start"}) {

            my $maxSize = max(scalar(@{$counts{"+"}}), scalar(@{$counts{"-"}}), scalar(@{$coverage{"+"}}), scalar(@{$coverage{"-"}}));
            push(@{$counts{"+"}}, (0) x ($maxSize - scalar(@{$counts{"+"}})));
            push(@{$coverage{"+"}}, (0) x ($maxSize - scalar(@{$coverage{"+"}})));

            if ($sample->{library} ne "unstranded") {

                push(@{$counts{"-"}}, (0) x ($maxSize - scalar(@{$counts{"-"}})));
                push(@{$coverage{"-"}}, (0) x ($maxSize - scalar(@{$coverage{"-"}})));
            }

            $rcIO{"+"}->writeBytewise($id, $blockData{"start"}, $counts{"+"}, $coverage{"+"}, $stats{totalPrimary});
            $counts{"+"} = [];
            $coverage{"+"} = [];

            if ($sample->{library} ne "unstranded") {

                $rcIO{"-"}->writeBytewise($id, $blockData{"start"}, $counts{"-"}, $coverage{"-"}, $stats{totalPrimary});
                $counts{"-"} = [];
                $coverage{"-"} = [];

            }

        }

        $stats{onlyMut} = \%onlyMut;

        return(\%stats);

    }
    else { return({ error => "Error: Unable to read file \"" . $sample->{file} . "\"" }); }

}

sub cleanup {

    unlink(glob($tmpdir . "*"));
    
    if (!-s $output . "error.out") { unlink($output . "error.out"); }
    else { print "\n\n  [!] Warning: Execution completed with error(s)/warning(s). Please check the \"${output}/error.out\" file\n"; }

    rmtree($tmpdir);

}

sub guessTypeAndSorting {

    my $file = shift;

    my ($type, $header, $eof, $sorted, 
        $i, $h, $missingMD, @data, %header);
    $i = $h = $missingMD = 0;
    $sorted = 1;
    $header = "\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00";
    $eof = "\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00";

    open(my $fh, "-|", "$samtools view -h $samtoolsParams $file 2>&1") or die "\n  [!] Error: Unable to open sample \"$file\" ($!)\n\n";
    while (<$fh>) {

        next if (/PG line/);

        if (substr($_, 0, 1) eq "@") {

            if (substr($_, 1, 2) eq "SQ") {

                my @row = split "\t";
                $header{(split(":", $row[1]))[1]} = $h;
                $h++;

            }

        }
        else {

            if ($mutcount) { $missingMD = 1 unless (/\tMD:Z:/); }

            $i++;
            my @row = split "\t";

            die "\n  [!] Error: Sample \"$file\" is not a valid SAM/BAM file\n\n" if (/(?:Failed to open file|fail to read the header)/ ||
                                                                                      @row < 12 || !isint($row[1]) || !isdna($row[9]));

            $sorted = 0 if (@data && ($header{$row[2]} < $header{$data[0]} ||
                                      ($header{$row[2]} == $header{$data[0]} && $row[3] < $data[1])));

            @data = ($row[2], $row[3]);

        }

        last if ($i == 100);

    }
    close($fh);

    undef(@data);

    open($fh , "<:raw", $file);
    read($fh, $data[0], 16);
    seek($fh, -28, SEEK_END);
    read($fh, $data[1], 28);
    close($fh);

    if ($data[0] eq $header && $data[1] eq $eof) { $type = "BAM"; }
    else { $type = "SAM"; }

    return($type, $sorted, $missingMD);

}

sub parsemd { # SAM MD flag parser

    my ($row, $gaps, $strand) = @_;

    my ($md, $llen, $realStart, $lastGapEnd,
        @pos, @quals, @lowQual, %posMap);
    $realStart = $row->[3] - 1;
    $llen = $realStart;
    $lastGapEnd = $realStart;

    return([], [], 1) if (@{$row} < 11);

    for (10 .. $#{$row}) { if ($row->[$_] =~ m/^MD:Z:(.+)$/) { $md = $1; last; } }

    return([], [], 1) if (!defined $md);

    %posMap = ref2read($row);
    @quals = split(//, $row->[10]);
    $md = uc($md);

    while($md =~ m/^(\d+)/) {

        my ($len, $next);
        $len = $1;
        $md =~ s/^$len//;

        while (@$gaps && $llen + $len > $lastGapEnd + $gaps->[0]->[0]) {

            my $gap = shift(@$gaps); 
            $llen += $gap->[1];
            $lastGapEnd += $gap->[0] + $gap->[1] + 1; 

        }

        while($md =~ m/^(\D)/) {

            $next = $1;

            if ($next eq "^") { # Deletion

                my ($del, $start, $end, $win,
                    $real, $disttodel, %collapsed);

                $md =~ s/^\^//;
                $md =~ m/^([ACGNT]+)/;
                $del = $1;

                if (!$nodel && length($del) <= $maxdel) { # Count unambiguously mapped deletions as mutations

                    my ($before, $after, @truedel);

                    # Extract a window of 20 nt (+/- 10nt) centered on the deletion
                    $start = $len + $llen - 1 - $blockData{"start"} >= 9 ? $len + $llen - 1 - 9 : $blockData{"start"};
                    $end = $len + $llen - 1 + length($del) + 10 < $transcripts{$realid{$row->[2]}} ? $len + $llen - 1 + length($del) + 10 : $transcripts{$realid{$row->[2]}} - 1;
                    $win = substr($blockData{"sequence"}, $start - $blockData{"start"}, $end - $start + 1);
                    $disttodel = $len + $llen >= 10 ? 10 : $len + $llen;
                    $real = substr($blockData{"sequence"}, $start - $blockData{"start"}, $disttodel) . substr($blockData{"sequence"}, $start + $disttodel + length($del) - $blockData{"start"}, $end - $start - $disttodel - length($del) + 1);

                    # Slide the deletion along sequence, and build hash table
                    push(@{$collapsed{substr($win, 0, $_) . substr($win, $_ + length($del), length($win) - length($del) - $_)}}, $start + $_ + length($del) - 1) for (0 .. length($win) - length($del));

                    if (@{$collapsed{$real}} > 1) { # Deletion is not unambiguously mapped
                    
                        if (!$noambiguous) {

                            my $shift = @{$collapsed{$real}} - 1;
                            my $truedel = $leftalign ? ($strand eq "+" ? min(@{$collapsed{$real}}) : max(@{$collapsed{$real}})) :
                                                       ($strand eq "+" ? max(@{$collapsed{$real}}) : min(@{$collapsed{$real}}));
                            $before = $truedel - length($del);
                            $after = $truedel + 1;
                            @truedel = $strand eq "+" ? ($rightdel ? ($truedel) : ($leftdel ? ($before + 1) : ($before + 1 .. $truedel))) :
                                                        ($rightdel ? ($before + 1) : $leftdel ? ($truedel) : ($before + 1 .. $truedel));
                            push(@pos, @truedel); #if (($evalsurround &&
                                                  #    ((exists $posMap{$before} && unpack("C*", $quals[$posMap{$before}]) - 33 >= $minqual) || !exists $posMap{$before}) &&
                                                  #    ((exists $posMap{$after} && unpack("C*", $quals[$posMap{$after}]) - 33 >= $minqual) || !exists $posMap{$after})) ||
                                                  #   !$evalsurround);

                        }

                    }
                    else {

                        $before = $collapsed{$real}->[0] - length($del);
                        $after = $collapsed{$real}->[0] + 1;
                        @truedel = $strand eq "+" ? ($rightdel ? ($collapsed{$real}->[0]) : ($leftdel ? ($before + 1) : ($before + 1 .. $collapsed{$real}->[0]))) :
                                                    ($rightdel ? ($before + 1) : $leftdel ? ($collapsed{$real}->[0]) : ($before + 1 .. $collapsed{$real}->[0]));
                        push(@pos, @truedel); # if (($evalsurround &&
                                              #    ((exists $posMap{$before} && unpack("C*", $quals[$posMap{$before}]) - 33 >= $minqual) || !exists $posMap{$before}) &&
                                              #    ((exists $posMap{$after} && unpack("C*", $quals[$posMap{$after}]) - 33 >= $minqual) || !exists $posMap{$after})) ||
                                              #   !$evalsurround);

                    }

                }

                $len += length($del);
                $md =~ s/^$del//;

            }
            elsif ($next =~ m/^([ACGNT])$/) {

                my ($readBase, $realNext);
                $readBase = substr($row->[9], $posMap{$llen + $len}, 1);
                $realNext = $next;

                if ($next ne "N" && $readBase ne "N") {

                    if ($strand eq "-") {

                        $readBase = dnarevcomp($readBase);
                        $next = dnarevcomp($next);

                    }

                    # Include the mutation if both the mutated base and the bases immediately surroundinding
                    # have a quality score >= minqual, and the mutated base is not a N in the reference or in the read
                    if (unpack("C*", $quals[$posMap{$llen + $len}]) - 33 >= $minqual &&
                        (($evalsurround &&
                        ((exists $posMap{$llen + $len - 1} && unpack("C*", $quals[$posMap{$llen + $len - 1}]) - 33 >= $minqual) || !exists $posMap{$llen + $len - 1}) &&
                        ((exists $posMap{$llen + $len + 1} && unpack("C*", $quals[$posMap{$llen + $len + 1}]) - 33 >= $minqual) || !exists $posMap{$llen + $len + 1})) ||
                        !$evalsurround) &&
                        (($onlyMut && $onlyMut{$next . $readBase}->{take}) || !$onlyMut)) { push(@pos, $len + $llen); }
                    else { push(@lowQual, $len + $llen); }

                    $onlyMut{$next . $readBase}->{count}++ if ($onlyMut);

                }

                $md =~ s/^$realNext//;
                $len++;

            }

        }

        $llen += $len;

    }

    return(\@pos, \@lowQual, 0);

}

sub parsecigar {

    my $cigar = shift;

    my ($cov, $truelen, $last, $lastMatch,
        @ins, @gaps);
    ($cov, $truelen, $last, $lastMatch) = (0, 0, 0, 0);

    while ($cigar =~ m/^(\d+[SH])/) { $cigar =~ s/^$1//; }

    while($cigar =~ m/^(\d+)([MIDNSHP=X])/) {

        my ($n, $op) = ($1, $2);

        if ($op eq "N") {

            push(@gaps, [$lastMatch - 1, $n]);
            $lastMatch = 0;

        }

        $last += $n if ($op =~ m/^[DNM=X]$/);

        if ($op =~ m/^[DM=X]$/) {

            $cov += $n;
            $lastMatch += $n;

        }

        $truelen += $n if ($op =~ m/^[MI=X]$/);
        push(@ins, $last - 1) if ($op eq "I"); # Relative position of the inserted nucleotide

        $cigar =~ s/^$n$op//;

    }

    return($cov, $truelen, $last, \@ins, \@gaps);

}

sub editdist {

    my $row = shift;

    my $dist = 0;

    if ($row =~ m/MD:Z:(\S+)/) {

        my ($md, $del, $mut);
        $md = $1;
        ($del) = $md =~ s/\^(?:N*[ACGT]+|[ACGT]+N*)[ACGTN]*//g; # This is to avoid counting N-only deletions
        ($mut) = $md =~ tr/ACGT/ACGT/;
        $dist = $del + $mut;

    }

    return($dist);

}

# Collapses consecutive mutations toward the 3'-most one
# e.g. collapsemutations(10, 11, 12) = 12
# Distance tollerance between consecutive mutations is set by $maxmutdist (default: 2)
sub collapsemutations {

    my ($values, $strand) = @_;

    my @values = sort {$a <=> $b} uniq(@$values);
    @values = reverse(@values) if ($strand eq "-");

    return unless(@values);

    for (my $i=0; $i < $#values; $i++) {

        if (abs(diff(@values[$i..$i+1])) <= $maxmutdist) {

            splice(@values, $i, 1);
            $i--;

        }

    }

    return($strand eq "+" ? @values : reverse(@values));

}

sub rmconsecutive {

    my @muts = @_;

    return(@muts) if (@muts < 2);

    my ($last, @selected);

    for(my $i = 0; $i < @muts; $i++) {

        next if ($i > 0 && inrange($muts[$i], [$muts[$i - 1], $muts[$i - 1] + $rmconsecutive]));
	    next if ($i < $#muts && inrange($muts[$i], [$muts[$i + 1] - $rmconsecutive, $muts[$i + 1]]));

	    push(@selected, $muts[$i]);

    }

    return(@selected);

}

sub ref2read { # Reference to read relative position

    my $row = shift;

    my ($rpos, $qpos, $cigar, $lastrpos,
        $lastqpos, @ops, %pos, %ops);
    $cigar = $row->[5];
    $rpos = $lastrpos = $row->[3] - 1;
    $qpos = $lastqpos = 0;
    %ops = ( "M" => [1, 1],
             "I" => [1, 0],
             "D" => [0, 1],
             "N" => [0, 1],
             "S" => [1, 0],
             "H" => [0, 0],
             "P" => [0, 0],
             "=" => [1, 1],
             "X" => [1, 1] );

    while($cigar =~ m/^(\d+)([MIDNSHP=X])/) {

        my ($n, $op) = ($1, $2);
        push(@ops, [$n, $op]);
        $cigar =~ s/^$n$op//;

    }

    for (@ops) {

        my ($n, $op) = @{$_};
        next if (!$ops{$op}->[0] &&
                 !$ops{$op}->[1]);

        $qpos += $n if ($ops{$op}->[0]);
        $rpos += $n if ($ops{$op}->[1]);

        if ($ops{$op}->[0] &&
            $ops{$op}->[1]) {

            if (my $diff = $qpos - $lastqpos) { $pos{$lastrpos + $_} = $lastqpos + $_ for (0 .. $diff - 1); }

        }

        $lastqpos = $qpos;
        $lastrpos = $rpos;

    }

    return(%pos);

}

sub help {

    print "\n  [!] Error: Invalid option. Please check the help\n" if ($_[0]);

    die <<HELP;

 RF Count Genome (v$Core::Utils::VERSION)
 RNA Framework [http://www.rnaframework.com]

 Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
 Summary: Calculates per-base RT-stops/mutations and coverage (genome-level)

 Usage:   rf-count-genome [Options] Sample1.sam:[ufs] Sample2.bam:[ufs] ... Samplen.sam:[ufs]

 Options                                          Description
 -p   or --processors              <int>          Number of processors to use (Default: 1)
 -wt  or --working-threads         <int>          Number of working threads to use for each instance of SAMTools (Default: 1).
                                                  Note: RF Count executes 1 instance of SAMTools for each processor specified by -p.
                                                        At least -p <processors> * -wt <threads> processors are required.
 -P   or --per-file-progress                      The progress of each individual file is shown as a separate progress bar
                                                  Note: this only works in interactive mode. If output is redirected to file, a single 
                                                        progress bar is shown reporting the overall status. Similarly, if the number of 
                                                        samples exceedes the number of lines in the terminal, a single progress bar is shown.
 -bs  or --block-size              <int>          Maximum size of the chromosome block to keep in memory (>=1000, Default: 100000)
 -o   or --output-dir              <string>       Output directory (Default: rf_count/)
 -ow  or --overwrite                              Overwrites output directory (if the specified path already exists)
 -s   or --samtools                <string>       Path to samtools executable (Default: assumes samtools is in PATH)
 -g   or --img                                    Enables the generation of statistic plots of per-base % mutations/RT-stops and, for MaP
                                                  experiments only, of % mutated reads (requires R)
 -R   or --R-path                  <string>       Path to R executable (Default: assumes R is in PATH)
 -t5  or --trim-5prime             <int>[,<int>]  Comma separated list (no spaces) of values indicating the number of bases trimmed from the
                                                  5'-end of reads in the respective sample SAM/BAM files (Default: 0)
                                                  Note: Values must be provided in the same order as the input files.
                                                        If a single value is specified along with multiple SAM/BAM files, it will
                                                        be used for all files.
 -f   or --fasta                   <string>       Path to a FASTA file containing the reference transcripts
                                                  Note: Transcripts in this file must match transcripts in SAM/BAM file headers
 -ndd or --no-discard-duplicates                  Reads marked as PCR/optical duplicates, discarded by default, will be also considered
 -pn  or --primary-only                           Considers only primary alignments (SAM flag != 256)
 -po  or --paired-only                            When processing SAM/BAM files from paired-end experiments, only those reads for which
                                                  both mates are mapped will be considered
 -pp  or --properly-paired                        When processing SAM/BAM files from paired-end experiments, only those reads mapped in a
                                                  proper pair will be considered
 -mq  or --map-quality                            Minimum mapping quality to consider a read (Default: 0)
 -ls  or --library-strandedness    <string>       Defines which genomic strand alignment-derived counts must be assigned to (0-2, 
                                                  Default: unstranded [0] (with -m or -co), second-strand [2] otherwise)
                                                  Note: strandedness specified via -ls can be overridden for individual samples by appending 
                                                        a colon followed by the library type to the sample name.
 -co  or --coverage-only                          Only calculates per-base coverage (disables RT-stops/mutations count)
 -m   or --count-mutations                        Enables mutations count instead of RT-stops count (for SHAPE-MaP/DMS-MaPseq)

 |
 +- Mutation count mode options
    -om or --only-mut                <string>     Only the specified mutations will be counted
                                                  Note: mutations must be provided in the form [original]2[mutated]. For example, "A2T" (or "A>T",
                                                        or "A:T") will only count mutation events in which a reference A base has been sequenced as
                                                        a T. IUPAC codes are also accepted. Multiple mutations must be provided as a comma (or semi-colon)
                                                        separated list (e.g. A2T;C:N,G>A). When specified, this parameter automatically disables insertion
                                                        and deletion count
    -ds or --discard-shorter         <int>        Discards reads spanning less than this number of bases, excluding clipped bases (unless -ic is specified)
                                                  (Default: 1)
    -q  or --min-quality                          Minimum quality score value to consider a mutation (Phred+33, Default: 20)
    -ncl or --no-cov-low-qual                     If a mutated base (or one of the surrounding bases, if -es is specified) does not exceed the -mq minimum
                                                  quality threshold, that base will be considered as non covered
    -es or --eval-surrounding                     When considering a mutation, also evaluate the quality of surrounding bases (+/- 1nt)
                                                  Note: the quality score threshold set by -q (or --min-quality) also applies to these bases
    -nd or --no-deletions                         Ignores deletions
    -ni or --no-insertions                        Ignores insertions
    -na or --no-ambiguous                         Ignores ambiguously mapped deletions
                                                  Note: The default behavior is to re-align them to their right-most valid position (or to their
                                                        left-most valid position if -la has been specified)
    -la or --left-align                           Re-aligns ambiguously mapped deletions to their left-most valid position
                                                  Note: by default, ambiguously mapped deletion are re-aligned to their right-most valid position
    -rd or --right-deletion                       Only the right-most base in a deletion is marked as mutated
    -ld or --left-deletion                        Only the left-most base in a deletion is marked as mutated
    -md or --max-deletion-len        <int>        Ignores deletions longer than this number of nucleotides (Default: 10)
    -me or --max-edit-distance       <float>      Discards reads with editing distance frequency higher than this threshold (0<m<=1, Default: 0.15 [15%])
    -eq or --median-quality          <int>        Median quality score threshold for discarding low-quality reads (Phred+33, Default: 20)
    -dc or --discard-consecutive     <int>        Discards consecutive mutations within this distance from eachothers
    -cc or --collapse-consecutive                 Collapses consecutive mutations/indels toward the 3'-most one
    -mc or --max-collapse-distance   <int>        Maximum distance between consecutive mutations/indels to allow collapsing (requires -cc, >=0, Default: 2)

HELP

}

