#!/usr/bin/env perl

use strict;
use File::Basename;
use File::Copy;
use File::Spec;
use FindBin qw($Bin);
use Fcntl qw(:flock SEEK_SET SEEK_END);
use Getopt::Long qw(:config no_ignore_case);

use lib $Bin . "/lib";

use Core::Utils;
use Core::Mathematics qw(:all);
use Core::Process::Queue;
use Core::Statistics;
use Data::IO::Sequence;
use Data::Sequence::Utils;
use Graphics::Chart::Barplot;
use Graphics::Image;
use RF::Data::RC;
use RF::Data::IO::MM;
use RF::Data::IO::RC;
use RF::Utils;
use Term::Progress;
use Term::Progress::Multiple;
use Term::Table;
use Term::Utils;

$|++;

my ($tmpdir, $output, $wt, $samtoolsParams,
    $samtools, $multifasta, $sam, $help, $sortByReadName,
    $overwrite, $error, $bam_trim5, $offset, 
    $threads, $processmanager, $mbfile, $table, 
    $mutcount, $seqio, $rcio, $includeclip, $maxClipped,
    $covonly, $mutmap, $nodel, $pp, $po, $tsPerJob,
    $nodiscarddup, $minqual, $maxdel, $maxmut, $mmSplitPairedEnd,
    $hashead, $progressBar, $mapqual, $noambiguous, 
    $medianqual, $noins, $collapse, $maxmutdist, $samtoolsMem,
    $evalsurround, $leftalign, $discardshorter, $leftdel, 
    $rightdel, $rmconsecutive, $maxcov, $primaryonly, 
    $whitelist, $onlyMut, $noCovLowQual, $outRawCounts, 
    $fast, $multiBar, $image, $R, $forceSingleRead, $peAllMuts, 
    @mutClasses, @bam_trim5, @ids, @qcounter, @stats, %transcripts, 
    %files, %masks, %realid, %onlyMut, %allStats, %allFreqs, 
    %inHeaders);

do {

    local $SIG{__WARN__} = sub { };

    GetOptions( "h|help"                       => \$help,
                "a|fast"                       => \$fast,
                "o|output-dir=s"               => \$output,
                "ow|overwrite"                 => \$overwrite,
                "t5|trim-5prime=s"             => \$bam_trim5,
                "wt|working-threads=i"         => \$wt,
                "s|samtools=s"                 => \$samtools,
                "p|processors=i"               => \$threads,
                "f|fasta=s"                    => \$multifasta,
                "m|count-mutations"            => \$mutcount,
                "orc|out-raw-counts"           => \$outRawCounts,
                "mf|mask-file=s"               => \$mbfile,
                "ic|include-clipped"           => \$includeclip,
                "co|coverage-only"             => \$covonly,
                "nd|no-deletions"              => \$nodel,
                "ni|no-insertions"             => \$noins,
                "md|max-deletion-len=i"        => \$maxdel,
                "pp|properly-paired"           => \$pp,
                "po|paired-only"               => \$po,
                "q|min-quality=i"              => \$minqual,
                "mq|map-quality=i"             => \$mapqual,
                "me|max-edit-distance=s"       => \$maxmut,
                "na|no-ambiguous"              => \$noambiguous,
                "la|left-align"                => \$leftalign,
                "eq|median-quality=i"          => \$medianqual,
                "cc|collapse-consecutive"      => \$collapse,
                "mc|max-collapse-distance=i"   => \$maxmutdist,
                "es|eval-surrounding"          => \$evalsurround,
                "ds|discard-shorter=i"         => \$discardshorter,
                "mm|mutation-map"              => \$mutmap,
                "ld|left-deletion"             => \$leftdel,
                "rd|right-deletion"            => \$rightdel,
                "dc|discard-consecutive=i"     => \$rmconsecutive,
                "mv|max-coverage=i"            => \$maxcov,
                "pn|primary-only"              => \$primaryonly,
                "wl|whitelist=i"               => \$whitelist,
                "om|only-mut=s"                => \$onlyMut,
                "ndd|no-discard-duplicates"    => \$nodiscarddup,
                "ncl|no-cov-low-qual"          => \$noCovLowQual,
                "P|per-file-progress"          => \$multiBar,
                "g|img"                        => \$image,
                "R|R-path=s"                   => \$R,
                "sm|samtools-memory=s"         => \$samtoolsMem,
                "fsr|force-single-read"        => \$forceSingleRead,
                "pam|paired-end-all-mutations" => \$peAllMuts,
                "sbn|sort-by-read-name"        => \$sortByReadName,
                "msp|mm-split-paired-end=s"    => \$mmSplitPairedEnd,
                "mp|max-clipped=s"             => \$maxClipped ) or help(1);

};

help() if ($help);

# Default values
$output ||= "rf_count/";
$wt ||= 1;
$offset = 0;
$threads ||= 1;
$bam_trim5 //= 0;
$minqual //= 20;
$medianqual //= 20;
$mapqual //= 0;
$maxdel //= 10;
$maxmutdist //= 2;
$maxmut ||= 0.15;
$discardshorter //= 1;
$rmconsecutive //= 0;
$maxClipped ||= 0;
$mmSplitPairedEnd ||= 0;
$samtools ||= which("samtools");
$samtoolsMem ||= "500M";
$R = checkRinstall($R) if ($image && !$covonly);
@mutClasses = qw(AC AG AT CA CG CT GA GC GT TA TC TG ins del);

$discardshorter = uc($discardshorter);
$output =~ s/\/?$/\//;
$tmpdir = $output . "tmp/";
$multiBar = 0 if (!-t STDOUT);

##
# Input validation
##

die "\n  [!] Error: No sample SAM/BAM file provided\n\n" if (!@ARGV);
die "\n  [!] Error: No FASTA file provided\n\n" if (!defined $multifasta);
die "\n  [!] Error: Provided FASTA file doesn't exist\n\n" if (!-e $multifasta);
die "\n  [!] Error: Provided mask file doesn't exist\n\n" if (defined $mbfile && !-e $mbfile);
die "\n  [!] Error: Parameters -co and -m are mutually exclusive\n\n" if ($mutcount && $covonly);
die "\n  [!] Error: Working threads value must be an integer greater than 0\n\n" if (!isint($wt) || $wt < 1);
die "\n  [!] Error: Invalid format for -t5 parameter's argument\n\n" if (defined $bam_trim5 && $bam_trim5 !~ m/^(\d+[;,]?)+$/);
die "\n  [!] Error: Number of processors must be an integer greater than 0\n\n" if ($threads < 1);
die "\n  [!] Error: Minimum quality score value must be and integer >= 0 and <= 41\n\n" if (!inrange($minqual, [0, 41]));
die "\n  [!] Error: Median read's quality score value must be and integer >= 0 and <= 41\n\n" if (!inrange($medianqual, [0, 41]));
die "\n  [!] Error: Maximum edit distance value must be > 0 and <= 1\n\n" if (!inrange($maxmut, [0, 1]) || !$maxmut);
die "\n  [!] Error: Parameters -na and -la are mutually exclusive\n\n" if ($noambiguous && $leftalign);
die "\n  [!] Error: Parameters -cc and -dc are mutually exclusive\n\n" if ($collapse && $rmconsecutive);
die "\n  [!] Error: Max coverage must be >= 1000\n\n" if (defined $maxcov && $maxcov < 1000);
die "\n  [!] Error: Max number of clipped bases must be a positive INT\n\n" if (!isint($maxClipped) || !ispositive($maxClipped));
die "\n  [!] Error: MM paired-end splitting value must be a positive INT\n\n" if (!isint($mmSplitPairedEnd) || !ispositive($mmSplitPairedEnd));
die "\n  [!] Error: Parameter -mv requires parameter -m\n\n" if ($maxcov && !$mutcount);
die "\n  [!] Error: Parameter -mm requires parameter -m\n\n" if ($mutmap && !$mutcount);
die "\n  [!] Error: Parameter -nd requires parameter -m\n\n" if ($nodel && !$mutcount);
die "\n  [!] Error: Parameter -orc requires parameter -m\n\n" if ($outRawCounts && !$mutcount);
die "\n  [!] Error: Parameter -om requires parameter -m\n\n" if ($onlyMut && !$mutcount);
die "\n  [!] Error: Whitelist must be an integer > 0\n\n" if (defined $whitelist && $whitelist < 1 && $mutcount);
die "\n  [!] Error: Discard shorter must be either \"MEDIAN\" or > 0\n\n" if (defined $discardshorter && ((isint($discardshorter) && $discardshorter < 1) && $discardshorter ne "MEDIAN"));
die "\n  [!] Error: Invalid value/format for samtools memory\n\n" if ($samtoolsMem !~ /^\d+[KMG]$/);
warn "\n  [!] Warning: Some input files are duplicated. Considering only unique files...\n" if (@ARGV != uniq(@ARGV));

if ($image && $covonly) {

    print "\n  [i] Note: No plot will be generated in -co (--coverage-only) mode...\n";
    undef($image);

}

if ($mutcount && $onlyMut) {

    # Automatically disables counting indels
    $nodel = 1;
    $noins = 1;

    %onlyMut = map { my $i = $_; map { $i . $_ => { count => 0,
                                                    take  => 0 } } grep { $i ne $_ } qw(A C G T) } qw(A C G T);

    foreach my $mutation (split(/[,;]/, $onlyMut)) {

        my @bases = split(/[:2>]/, $mutation);

        die "\n  [!] Error: Invalid format for -mo parameter's argument \"" . $mutation . "\"\n\n" if (@bases != 2);
        die "\n  [!] Error: Mutation in -mo parameter's argument \"" . $mutation . "\" contains a non-IUPAC character\n\n" if (!isiupac(join("", @bases)));

        foreach my $base1 (split //, (iupac2nt($bases[0]))[0]) {

            foreach my $base2 (grep { $base1 ne $_ } split //, (iupac2nt($bases[1]))[0]) { $onlyMut{$base1 . $base2}->{take} = 1; }

        }

    }

}

if (!defined $samtools) { die "\n  [!] Error: samtools is not in PATH\n\n"; }
elsif (!-e $samtools) { die "\n  [!] Error: samtools does not exist\n\n"; }
elsif (!-x $samtools) { die "\n  [!] Error: samtools is not executable\n\n"; }
else {

    my $ret = `$samtools 2>&1`;

    if ($ret =~ m/Version: ([\d\.]+)/) {

        my $version = $1;

        die "\n  [!] Error: RF Count requires SAMTools v1 or greater (Detected: v" . $version . ")\n\n" if (substr($version, 0, 1) < 1);

    }
    else { warn "\n  [!] Warning: Unable to detect SAMTools version\n"; }

    my (@noFlags, @yesFlags);
    @noFlags = qw(UNMAP QCFAIL);
    push(@yesFlags, "PAIRED") if ($pp || $po);
    push(@yesFlags, "PROPER_PAIR") if ($pp);
    push(@noFlags, "REVERSE") if (!$mutcount && !$covonly);
    push(@noFlags, "SECONDARY") if ($primaryonly);
    push(@noFlags, "DUP") if (!$nodiscarddup);

    $samtoolsParams = "-f " . join(",", uniq(@yesFlags)) . " " if (@yesFlags);
    $samtoolsParams .= "-F " . join(",", uniq(@noFlags)) . " -q $mapqual";

}

print "\n[+] Making output directory...";

if (-e $output) {

    if ($overwrite) {

        my $error = rmtree($output);

        die "\n\n  [!] Error: " . $error . "\n\n" if ($error);

    }
    else { die "\n\n  [!] Error: Output directory already exists." .
               "\n      Please use -ow (or --overwrite) to overwrite output directory\n\n"; }

}

if (my $error = mktree($tmpdir, "755")) { die "\n\n  [!] Error: Unable to create output directory ($error)\n\n"; }
mktree("${output}whitelists/", "755") if ($whitelist);
mktree("${output}plots/", "755") if ($image);

if ($mutcount) {

    mktree("${output}frequencies/", "755") if ($onlyMut);
    mktree("${output}raw_counts/", "755") if ($outRawCounts);

}

##
# Prepare files
##

$table = Term::Table->new(indent => 2);
$table->head("Sample", "File format", "Sorted", "Indexed", "5'-end trimming");

@bam_trim5 = split(/,/, $bam_trim5);

undef($bam_trim5);
$bam_trim5 = shift(@bam_trim5) if (@bam_trim5 == 1);        # If only one value for 5' trimming in SAM/BAM files has been specified,
                                                            # this is applied to all the passed SAM/BAM files

print "\n[+] Checking files:\n\n";

foreach my $sample (uniq(@ARGV)) {

    my ($format, $sorted, $missingMD, @sample);
    @sample = split /:/, $sample;
    unshift(@sample, (fileparse($sample, qr/\.[^.]*/))[0]) if (@sample == 1 || $sample[0] eq "");

    die "  [!] Error: Specified sample file \"" . $sample[1] . "\" doesn't exist\n\n" if (!-e $sample[1]);

    ($format, $sorted, $missingMD) = guessTypeAndSorting($sample[1]);

    push(@qcounter, { path      => $sample[1],
                      file      => exists $files{$sample[0]} ? $sample[0] . "_" . $files{$sample[0]} : $sample[0],
                      type      => $format,
                      sorted    => $sorted,
                      indexed   => -e $sample[1] . ".bai" || -e $sample[1] . ".csi" ? 1 : 0,
                      trim5     => 0,
                      missingMD => $missingMD });

    $files{$sample[0]}++;

    # If only one value for 5' trimming in SAM/BAM files has been specified, this is applied to all the passed SAM/BAM files
    $qcounter[-1]->{trim5} = isint($bam_trim5) && ispositive($bam_trim5) ? $bam_trim5 : shift(@bam_trim5);

    die "  [!] Error: Fewer 5'-end trimming values in -t5 list than provided SAM/BAM files\n\n" if (!defined $qcounter[-1]->{trim5});
    die "  [!] Error: 5'-end trimming value must be a positive integer\n\n" if (!ispositive($qcounter[-1]->{trim5}) || !isint($qcounter[-1]->{trim5}));

    $table->row($qcounter[-1]->{file}, $qcounter[-1]->{type}, $sorted ? "Yes" : "No", 
                $qcounter[-1]->{indexed} ? "Yes" : "No", ($mutcount || $covonly) ? "Ignored" : $qcounter[-1]->{trim5} . " nt");

}

$multiBar = 0 if ($multiBar && @qcounter > (termsize())[0]);

die "  [!] Error: More 5'-end trimming values in -t5 list than provided BAM files\n\n" if (@bam_trim5);

$table->print();

print "\n";

# Starts the process manager
$processmanager = Core::Process::Queue->new( processors => $threads,
                                             stderr     => $output . "samtools.log",
                                             tmpDir     => $tmpdir,
                                             verbosity  => 1 );

##
# FASTA Parsing
##

print "\n[+] Getting transcripts from reference, and building count table base structure...";

$seqio = Data::IO::Sequence->new( file              => $multifasta,
                                  maskIUPAC         => 1,
                                  checkDuplicateIds => 1 );
$rcio = RF::Data::IO::RC->new( file       => $tmpdir . "base.rc",
                               index      => $output . "index.rci",
                               buildindex => 1,
                               mode       => "w" );

while (my $entry = $seqio->read()) {

    $entry->unmask(); # Makes sequence uppercase

    my ($id, $rentry, $offset);
    $id = $entry->id();
    $id =~ s/[^\w\.\-:]/_/g;    # Fixes sequence ids containing slashes that can cause errors at later stages

    $rentry = RF::Data::RC->new( id       => $id,
                                 sequence => $entry->sequence(),
                                 counts   => [(0) x $entry->length()],
                                 coverage => [(0) x $entry->length()] );
    ($offset) = $rcio->write($rentry);

    # Store length for later steps
    $transcripts{$id} = $fast ? $entry->sequence() : $entry->length();
    $realid{$id} = $entry->id();

}

$rcio->close();

##
# SAM/BAM Header validation
##

print "\n[+] Inspecting SAM/BAM file headers...";

foreach my $sample (@qcounter) {

    my $inheader = 0;

    open(my $fh, "-|", "$samtools view -H " . $sample->{path} . " 2>&1") or die "\n\n  [!] Error: Unable to read SAM/BAM header from sample \"" . $sample->{file} . "\" (" . $! . ")\n\n";

    while (my $row = <$fh>) {

        chomp($row);

        if ($row =~ m/^\@SQ\tSN:(.+?)\tLN:(\d+)/) {

            my ($id, $length) = ($1, $2);
            $id =~ s/[^\w\.\-:]/_/g;

            if (exists $transcripts{$id}) {

                my $trueLen = $fast ? length($transcripts{$id}) : $transcripts{$id};

                die "\n\n  [!] Error: Transcript \"$id\" length from sample \"" . $sample->{file} . "\" header ($length nt) differs from reference ($trueLen nt)." .
                    "\n             Please re-map your dataset using the same reference, and try again.\n\n" if ($trueLen != $length);

                $inheader++;

            }

            $inHeaders{$id} = 1;

        }

    }
    close($fh);

    die "\n\n  [!] Error: All transcripts in sample \"" . $sample->{file} . "\" header are absent in reference." .
        "\n             Please re-map your dataset using the same reference, or provide a different reference by the -f (or --fasta) parameter.\n\n" if (!$inheader);

    warn "\n\n  [!] Warning: Only " . $inheader . "/" . keys(%transcripts) . " reference transcripts are present in sample \"" . $sample->{file} . "\" header." .
         "\n               All transcripts absent in reference will be skipped.\n" if ($inheader != keys(%transcripts));

}

for (keys %transcripts) { delete($transcripts{$_}) if (!exists $inHeaders{$_}); }

@ids = sort keys %transcripts;
$tsPerJob = max(1, round(@ids / 500));

##
# Estimate read length (if maxcov is specified)
##

if (($maxcov || $discardshorter eq "MEDIAN") && $mutcount) {

    print "\n[+] Estimating median read lengths:\n\n";

    foreach my $sample (@qcounter) {

        my @lengths;

        open(my $fh, "-|", "$samtools view --no-PG $samtoolsParams " . $sample->{path} . " 2>&1") or die "  [!] Error: Unable to read SAM/BAM file for sample \"" . $sample->{file} . "\" (" . $! . ")\n\n";
        while (my $row = <$fh>) {

            chomp($row);
            my @row = split(/\t/, $row);

            push(@lengths, (parsecigar($row[5]))[2]); # The true length (so the number of bases of target covered)

            last if (@lengths == 10000);

        }
        close($fh);

        $sample->{medianReadLen} = round(median(@lengths));

    }

    my $table = Term::Table->new(indent => 2);
    $table->head("Sample", "Median");
    $table->row($_->{file}, $_->{medianReadLen} . " nt") for (@qcounter);
    $table->print();

    print "\n\n";

}
else { 
    
    print "\n"; 
    print "\n" if ((grep { !$_->{sorted} || $_->{type} eq "SAM" || $_->{missingMD} || !$_->{indexed} } @qcounter) || $sortByReadName);

}

##
# Sorting BAM files (if needed)
##

if (my @unsorted = grep { !$_->{sorted} } @qcounter) { 

    foreach my $sample (@unsorted) {

        my $path = $tmpdir . $sample->{file} . "_sorted.bam";

        $processmanager->enqueue( command => "$samtools sort --no-PG -@ $wt -m $samtoolsMem -O BAM -T \"" . $tmpdir . $sample->{file} . "\" -o \"$path\" \"" . $sample->{path} . "\"",
                                  id      => $sample->{file} );

        $sample->{path} = $path;
        $sample->{type} = "BAM";

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Sorting " . scalar(@unsorted) . " unsorted SAM/BAM file(s)");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to index sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

##
# Converting SAM>BAM files (if needed)
##

if (my @sam = grep { $_->{type} eq "SAM" } @qcounter) { 

    foreach my $sample (@sam) {

        my $path = $tmpdir . $sample->{file} . ".bam";

        $processmanager->enqueue( command => "$samtools view --no-PG -@ $wt -O BAM -o \"$path\" \"" . $sample->{path} . "\"",
                                  id      => $sample->{file} );

        $sample->{path} = $path;
        $sample->{type} = "BAM";

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Converting " . scalar(@sam) . " sorted SAM file(s) to BAM format");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to index sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

##
# Computing MD tag (if needed)
##

if (my @missingMD = grep { $_->{missingMD} } @qcounter) { 

    foreach my $sample (@missingMD) {

        my $path = $tmpdir . $sample->{file} . ".withMD.bam";

        $processmanager->enqueue( command => "$samtools calmd --no-PG -@ $wt -b \"" . $sample->{path} . "\" \"$multifasta\" > \"$path\"",
                                  id      => $sample->{file} );

        $sample->{path} = $path;

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Computing missing MD tag for " . scalar(@missingMD) . " files");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to compute MD tag for sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

##
# Indexing BAM files (if needed)
##

if (my @toIndex = grep { !$_->{indexed} } @qcounter) { 

    foreach my $sample (@toIndex) {

        $processmanager->enqueue( command => "$samtools index -@ $wt \"" . $sample->{path} . "\"",
                                  id      => $sample->{file} );

        $sample->{indexed} = 1;

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Indexing " . scalar(@toIndex) . " BAM file(s)");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to index sample \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

if ($sortByReadName) {

    foreach my $sample (@qcounter) {

        foreach my $id (@ids) {

            my $path = $tmpdir . ".$id." . $sample->{file} . ".bam";

            $processmanager->enqueue( command => "$samtools view --no-PG -@ $wt -O BAM -o \"$path\" \"" . $sample->{path} . "\" \"$id\"",
                                      id      => "$id." . $sample->{file} );

        }

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Splitting BAM files by reference");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to extract \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    foreach my $sample (@qcounter) {

        foreach my $id (@ids) {

            my ($path1, $path2);
            $path1 = $tmpdir . ".$id." . $sample->{file} . ".bam";
            $path2 = $tmpdir . ".$id.readSorted." . $sample->{file} . ".bam";

            $processmanager->enqueue( command => "$samtools sort -n --no-PG -@ $wt -O BAM -o \"$path2\" \"$path1\" && rm \"$path1\"",
                                      id      => "$id." . $sample->{file} );

        }

    }

    print "\n";
    $progressBar->reset();
    $progressBar->init("Sorting splits by read name");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to sort \"$_[0]\"\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });
    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}

if (-e $mbfile) {

    my $masked = 0;

    print "\n[+] Applying mask to transcript bases...";

    open(my $mh, "<", $mbfile) or die "\n\n  [!] Error: Unable to read from mask file (" . $! . ")\n\n";
    while(my $line = <$mh>) {

        chomp($line);

        next if ($line =~ m/^\s?#/);

        my @line = split(/[,;]/, $line);
        $line[0] =~ s/\//_/g;

        next if (@line < 2);
        next if (!exists $transcripts{$line[0]});

        foreach my $i (1 .. $#line) {

            my $sequence = uc($seqio->read($line[0])->sequence());
            $line[$i] =~ s/\s//g;

            if ($line[$i] =~ /^(?:rc:)?([ACGTUN]+)$/i) {

                my ($maskSeq, $index);
                $maskSeq = uc($1);
                $maskSeq = dnarevcomp($maskSeq) if ($line[$i] =~ /^rc:/i);
                $line[$i] = rna2dna($maskSeq);
                $index = index($sequence, $line[$i]);

                next if ($index == -1);

                push(@{$masks{$line[0]}}, [$index, $index + length($line[$i]) - 1]);
                $masked++;

            }
            elsif ($line[$i] =~ m/^(\d+)-(\d+)$/) {

                my ($start, $end) = ($1, $2);
                my $trueLen = $fast ? length($transcripts{$line[0]}) : $transcripts{$line[0]};

                next if (!ispositive($start) ||
                         $start > $end ||
                         $start > $trueLen - 1 ||
                         $end > $trueLen - 1);

                push(@{$masks{$line[0]}}, [$start, $end]);
                $masked++;

            }

        }

    }
    close($mh);

    print " [$masked masks applied]";

}

print "\n[+] Queuing jobs...";

for(my $i = 0; $i < @qcounter; $i++) {

    my ($sample, @sampleIds);
    $sample = $qcounter[$i];

    open(my $fh, "-|", "$samtools idxstats -@ " . ($threads * $wt) . " " . $sample->{path}) or die "\n\n  [!] Error: Unable to get IDs of covered transcripts ($!)\n\n";
    while(<$fh>) {

        chomp;
        my @entry = split /\t/;
        $entry[0] =~ s/[^\w\.\-:]/_/g;
        push(@sampleIds, $entry[0]) if ($entry[2] && exists $transcripts{$entry[0]}); # Only take IDs with alignments

    }
    close($fh);

    if (!@sampleIds) {
    splice(@qcounter, $i, 1);
    $i--;
    }

    for (my $i = 0; $i < @sampleIds; $i += $tsPerJob) {

        my ($last, @ts);
        $last = min($#sampleIds, $i + $tsPerJob - 1);
        @ts = @sampleIds[$i .. $last]; 

        $processmanager->enqueue( command   => \&count,
                                  arguments => [ $sample, \@ts ],
                                  id        => $sample->{file} );

        last if ($last == $#sampleIds);

    }

}

die "\n\n  [!] Error: All sample files are empty (no alignments). Nothing to be processed.\n\n" if (!$processmanager->queueSize());

print "\n[+] Copying RC base structure...";

foreach my $sample (@qcounter) {

    die "\n\n  [!] Error: Unable to copy counts table structure for sample \"" . $sample->{file} . "\" (" . $! . ")" unless (copy($tmpdir . "base.rc", $output . $sample->{file} . ".rc"));

}

print "\n[+] Calculating per-base " . ($covonly ? "coverage" : ($mutcount ? "mutation counts" : "RT-stops") . " and coverage") . ". This may take a while...\n\n";

if ($multiBar) {

    $progressBar = Term::Progress::Multiple->new( sets    => { map { $_->{file} => $processmanager->queueSize() / @qcounter } @qcounter },
                                                  colored => 1 ); 
    $progressBar->initAll();

}
else {

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init();

}

$processmanager->processors(min(ncores(), $threads * $wt));
$processmanager->stderr($output . "error.out");
$processmanager->parentOnExit(sub { 

    # Here we retrieve the partials from each process and combine them
    my ($sample, $pid, $uid, $stats);
    ($sample, $pid, $uid) = @_[0..2];
    $stats = ($processmanager->dequeue($uid)->exitcode())[0];
    
    if (ref($stats) eq "HASH") {

        my $errMsg = $stats->{error} || $stats->{warning};

        if ($multiBar) { $progressBar->update($sample); }
        else { $progressBar->update($errMsg ? (1, "$sample: $errMsg") : 1); }

        if (defined $stats->{error} || (defined $stats->{warning} && !$stats->{total})) { 
            
            $progressBar->appendText($sample, $errMsg) if ($multiBar);
            
            if (defined $stats->{error}) {

                $allStats{$sample}->{error} = $errMsg;
                $processmanager->deleteQueue($sample);
                $processmanager->killById($sample);

            }

        }
        else {

            $progressBar->appendText($sample, $errMsg) if ($multiBar && $errMsg);

            $allStats{$sample}->{$_} += $stats->{$_} for (qw(A C G T mutated total totalPrimary));
            $allStats{$sample}->{covered} += scalar(@{$stats->{ids}});
            
            if (defined $whitelist && $stats->{medianCov} >= $whitelist) {

                my $wlIO = Data::IO->new( file  => $output . "whitelists/" . $sample . ".txt",
                                          mode  => "w+",
                                          flush => 1 );
                $wlIO->write("$_\n") for (@{$stats->{ids}});

            }

            if ($mutcount && $outRawCounts) {

                my $rawIO = Data::IO->new( file  => $output . "raw_counts/" . $sample . ".txt",
                                           mode  => "w+",
                                           flush => 1 );

                for my $i (0 .. $#{$stats->{ids}}) {

                    $rawIO->write($stats->{ids}->[$i] . "\n" .
                                  join("\n", map { join("\t", $_, join(",", @{$stats->{rawCounts}->[$i]->{$_}})) } @mutClasses) . "\n\n");

                }

            }

            if ($mutcount && $onlyMut) { $allFreqs{$sample}->{$_}->{count} += $stats->{onlyMut}->{$_}->{count} for (keys %{$stats->{onlyMut}}); }

        }

    }

});

$processmanager->start();
$processmanager->waitall();

print "\n";

if ($mutmap && (my @samples = grep { !defined $allStats{$_}->{error} } keys %allStats)) {

    print "\n[+] Re-assembling partial MM files...";

    $processmanager->parentOnExit(sub {});

    foreach my $sample (@samples) {

        $processmanager->enqueue( command      => \&reassembleMM,
                                  arguments    => [ $sample ],
                                  id           => $sample );

    }

    $processmanager->start();
    $processmanager->waitall();

    print "\n[+] Indexing MM files...";

    foreach my $sample (@samples) {

        $processmanager->enqueue( command      => \&indexMM,
                                  arguments    => [ $sample ],
                                  id           => $sample );

    }

    $processmanager->start();
    $processmanager->waitall();

}

$table = Term::Table->new(indent => 2);

if ($covonly) { $table->head("Sample", "Covered"); }
elsif ($mutcount) { $table->head("Sample", "Covered", "Mutated alignments", "\%A muts", "\%C muts", "\%G muts", "\%U muts"); }
else { $table->head("Sample", "Covered", "\%A stops", "\%C stops", "\%G stops", "\%U stops"); }

foreach my $sample (grep { !defined $allStats{$_}->{error} } sort keys %allStats) {

    my @row = ($sample, $allStats{$sample}->{covered} || 0);

    if (!$covonly && $allStats{$sample}->{covered}) {

        my ($total, $mutReads, @bases);
        $total = sum(map { $allStats{$sample}->{$_} } qw(A C G T));
        $mutReads = 0;

        if ($mutcount) {

            if ($allStats{$sample}->{total}) {

                $mutReads = sprintf("%.2f", $allStats{$sample}->{mutated} / $allStats{$sample}->{total} * 100);
                push(@row, $allStats{$sample}->{mutated} . "/" . $allStats{$sample}->{total} . " ($mutReads\%)");

            }
            else { push(@row, "-"); }

        }

        if ($total) {

            @bases = map { sprintf("%.2f", $allStats{$sample}->{$_} / $total * 100 ) } qw(A C G T);
            push(@row, @bases);

        }
        else { push(@row, ("-") x 4); }

        push(@stats, [ $sample, @bases, $mutReads ]);


    }

    # Updates the read count in the RC file
    $rcio = RF::Data::IO::RC->new( file           => $output . $sample . ".rc",
                                   mode           => "w+",
                                   noPreloadIndex => 1 );
    $rcio->mappedreads($allStats{$sample}->{totalPrimary});
    $rcio->close();

    if ($mutcount && $onlyMut) {

        my ($totMuts, $freqIO);
        $totMuts = sum(map { $allFreqs{$sample}->{$_}->{count} } keys %{$allFreqs{$sample}});
        $freqIO = Data::IO->new( file  => $output . "frequencies/" . $sample . ".txt",
                                 mode  => "w+",
                                 flush => 1 );
        $freqIO->write(join("\n", map { join("\t", $_, $totMuts ? sprintf("%.6f", $allFreqs{$sample}->{$_}->{count} / $totMuts) : "NaN") } sort keys %{$allFreqs{$sample}}) . "\n");

    }

    $table->row(@row);

}

if ($table->nRows()) {

    print "\n[+] Statistics:\n\n";
    $table->print();
    print "\n";

}

if (my @failed = grep { defined $allStats{$_}->{error} } sort keys %allStats) {

    print "\n  [!] Processing failed for sample \"$_\" (" . $allStats{$_}->{error} . ")" for (@failed);
    print "\n";

}

if ($image && @stats) {

    print "\n[+] Generating plots...";

    my ($img, $plot);
    $img = Graphics::Image->new( file   => $output . "plots/base_stats.pdf",
                                 width  => min(scalar(@stats), 40),
                                 height => 8,
                                 R      => $R,
                                 tmpdir => $tmpdir );

    $plot = Graphics::Chart::Barplot->new( x             => "sample",
                                           data          => [ map { @$_[1 .. 4] } @stats ],
                                           dataLabels    => { base   => [ (qw(A C G T)) x @stats ],
                                                              sample => [ map { ($_->[0]) x 4 } @stats ] },
                                           fill          => "base",
                                           groupMethod   => "stack",
                                           dataLabelSort => { base => [ qw(T G C A) ]},
                                           legendSort    => [ qw(A C G T) ],
                                           legendColors  => { A => "#24A349",  
                                                              C => "#4687C7", 
                                                              G => "#EDBA1D",
                                                              T => "#A62324" },
                                           yTitle        => "Per-base " . ($mutcount ? "mutations" : "RT-stops") . " (\%)",
                                           xLabelAngle   => 90,
                                           labelTextSize => 8 );
    $img->plot([$plot]);

    if ($mutcount && sum(map { $_->[-1] } @stats)) {

        $img->file($output . "plots/read_mutation_stats.pdf");
        $plot = Graphics::Chart::Barplot->new( x             => "sample",
                                               data          => [ map { $_->[-1] } @stats ],
                                               dataLabels    => { sample => [ map { $_->[0] } @stats ] },
                                               fill          => "sample",
                                               legend        => 0,
                                               legendColors  => { map { $_->[0] => "black" } @stats },
                                               yTitle        => "Mutated reads (\%)",
                                               xLabelAngle   => 90,
                                               labelTextSize => 8 );

        $img->plot([$plot]);

    }

}

print "\n[+] Cleaning up temporary files...";

cleanup();

print "\n[+] All done.\n\n";

sub reassembleMM {

    my $sample = shift;

    my $ret = `cat $tmpdir$sample.*.mm > $output$sample.mm`;

    if (!$ret) {

        my $mmIO = RF::Data::IO::MM->new( file => "$output$sample.mm",
                                          mode => "w+" );
        $mmIO->close(); # adds the EOF marker

    }

}

sub indexMM {

    my $sample = shift;

    my $mmIO = RF::Data::IO::MM->new( file => "$output$sample.mm",
                                      index => "$output$sample.mm.mmi",
                                      mode => "r" );
    $mmIO->buildIndex();
    $mmIO->close(); # adds the EOF marker

}

sub count {

    my ($sample, $ids) = @_;

    undef($processmanager); # Frees unnecessary memory
    $seqio->forceReopenFh(); # On fork() filehandle is cloned. With this we close it and reopen it for all children.

    my ($stats, $mmIO, $rcIO, %stats);
    %stats = ( ids          => [],
               A            => 0,
               C            => 0,
               G            => 0,
               T            => 0,
               mutated      => 0,
               total        => 0,
               totalPrimary => 0,
               medianCov    => [],
               rawCounts    => [],
               onlyMut      => undef,
               warning      => undef,
               error        => undef );
 
    $rcIO = RF::Data::IO::RC->new( file  => $output . $sample->{file} . ".rc",
                                   index => $output . "index.rci",
                                   mode  => "w+" );

    # Including the first ID in the temp file name ensures no clashes between consecutive processes due to PID recycling
    $mmIO = RF::Data::IO::MM->new( file       => $tmpdir . $sample->{file} . "." . $ids->[0] . "." . $$ . ".mm", 
                                   appendable => 1,
                                   mode       => "w" ) if ($mutmap);

    foreach my $id (@$ids) {

        my ($countCmd, $rcEntry, $readsCount, $readsToMaxCov, 
            @readStarts, @counts, @coverage, %rawCounts);

        $countCmd = "$samtools view $samtoolsParams " . ($sortByReadName ? $tmpdir . ".$id.readSorted." . $sample->{file} . ".bam" : $sample->{path} . " " . $realid{$id});

        if (open(my $fh, "-|", "$countCmd 2>&1")) {

            my ($firstRead, %ref, %lastRead);

            if ($fast) { 
                            
                $ref{sequence} = $transcripts{$id}; 
                $ref{length} = length($ref{sequence}); 

            }
            else {

                my $entry = $seqio->read($id);
                $entry->unmask();
                $ref{sequence} = $entry->sequence();
                $ref{length} = $transcripts{$id};

            }

            $firstRead = 1;
            $readsCount = 0;
            $readsToMaxCov = $ref{length} < $sample->{medianReadLen} ? $maxcov : round(($ref{length} * $maxcov / $sample->{medianReadLen}) / ($ref{length} - $sample->{medianReadLen} + 1)) if ($maxcov);
            @counts = (0) x $ref{length};
            @coverage = (0) x $ref{length};
            %rawCounts = map { $_ => [ (0) x $ref{length} ] } @mutClasses if ($outRawCounts);

            while (!eof($fh)) {

                my ($row, $clip5, $cov, $ins,
                    $editdist, $truelen, $skippedMate, @row);
                $row = <$fh>;
                chomp($row);

                if ($row =~ /Could not retrieve index file/) { return({ error => "Error: Missing BAM index" }); }
                elsif ($row =~ /\[.+?\] (.+)$/) { 
                    
                    $stats{"warning"} = "Warning: $1";
                    
                    next;
                    
                }

                @row = split(/\t/, $row);

                if (@row < 11 || $row[5] eq "*") {

                    $stats{"warning"} = "Warning: Malformed BAM entry (< 11 fields or invalid CIGAR)";
                    
                    next;

                }

                ($clip5, $cov, $truelen, $ins) = parsecigar($row[5]);
                @$ins = map { $_ + $row[3] - 1 } @$ins; # Adjust insertion relative position to true position
                $editdist = editdist($row) + @$ins; # Editing distance is calculated this way so that consecutively deleted/inserted bases are counted only once

                if ($mutcount && ($cov < ($discardshorter eq "MEDIAN" ? $sample->{medianReadLen} : $discardshorter) ||
                                  $editdist / $cov > $maxmut ||
                                  median(map { unpack("C*", $_) - 33 } split(//, $row[10])) < $medianqual)) { # Check median read's quality

                    # If paired-end, and one mate has already been stored, we allow the corresponding memory to be freed
                    # as its mate would be discarded here
                    if (exists $lastRead{$row[0]} && !$forceSingleRead) { $skippedMate = 1; }
                    else { next; }

                }
                
                next if ($clip5 && !$mutcount && !$covonly && 
                         (!$includeclip || ($includeclip && $maxClipped && $clip5 > $maxClipped))); # Discard read in RT-count mode, if it has soft/hard clipping at 5'-end

                if ($mutcount || $covonly) {  # Mutations count / Coverage only modes

                    my ($start, $end, $isPairedRead, $mateUnmapped);

                    if (!$skippedMate) {

                        $isPairedRead = $forceSingleRead ? 0 : $row[1] & 1;
                        $mateUnmapped = $row[1] & 8;

                        my (@covered);
                        $start = max($row[3] - 1, 0);
                        
                        # If we include the clipped bases, we need to readjust start, and 
                        # in case including them would pass the ends of the transcript, 
                        # readjust the coverage
                        if ($includeclip && (!$maxClipped || $clip5 <= $maxClipped)) {

                            if ($start - $clip5 < 0) { $cov -= $clip5; }
                            else { $start -= $clip5; }

                        }

                        if ($maxcov) {

                            next if ($ref{length} >= $sample->{medianReadLen} && $start > $ref{length} - $sample->{medianReadLen});
                            next if ($readStarts[$start] == $readsToMaxCov);

                        }

                        unless ($row[1] & 256) {

                            $readsCount++;
                            $stats{totalPrimary}++;

                        }

                        $stats{total}++;
                        $readStarts[$start]++;

                        $end = min($ref{length} - 1, $row[3] + $cov - 2);
                        @covered = $start .. $end;

                        push(@{$lastRead{$row[0]}->{start}}, $start);
                        push(@{$lastRead{$row[0]}->{end}}, $end);
                        push(@{$lastRead{$row[0]}->{covered}}, clonearrayref(\@covered));

                    }

                    if ($mutcount) {

                        my ($muts, $rawMuts, $lowQual, $missingMD);

                        if (!$skippedMate) {

                            ($muts, $rawMuts, $lowQual, $missingMD) = parsemd(\@row, $ref{sequence});

                            return({ error => "Error: Missing MD tag, run 'samtools calmd' to fix" }) if ($missingMD);

                            if ($noCovLowQual) { @{$lastRead{$row[0]}->{covered}->[-1]} = grep { !exists $lowQual->{$_} } @{$lastRead{$row[0]}->{covered}->[-1]}; }

                            push(@{$lastRead{$row[0]}->{ins}}, @$ins);
                            push(@{$lastRead{$row[0]}->{muts}}, @$muts);
                            
                            if ($outRawCounts) { 
                                
                                if (!exists $lastRead{$row[0]}->{rawMuts}) { $lastRead{$row[0]}->{rawMuts} = $rawMuts; }
                                else {

                                    $lastRead{$row[0]}->{rawMuts} = {
                                        map {
                                            my $k = $_;
                                            $k => [ uniq(@{ exists $lastRead{$row[0]}->{rawMuts}->{$k} ? $lastRead{$row[0]}->{rawMuts}->{$k} : [] }, 
                                                         @{ exists $rawMuts->{$k} ? $rawMuts->{$k} : [] }) ]
                                        } keys %{{ %{$lastRead{$row[0]}->{rawMuts}}, %$rawMuts }}
                                    };

                                }
                                
                            }

                        }

                        # If single read, or if paired-end and 1) this mate was skipped, or 2) this is the second mate, or 3) the mate is not mapped
                        if ($skippedMate || ($isPairedRead && (@{$lastRead{$row[0]}->{start}} == 2 || $mateUnmapped)) || !$isPairedRead) {

                            my ($pairStart, $pairEnd) = processMutReads($id, $lastRead{$row[0]});
                            
                            if ($outRawCounts) {

                                foreach my $mut (keys %{$lastRead{$row[0]}->{rawMuts}}) { $rawCounts{$mut}->[$_]++ for (@{$lastRead{$row[0]}->{rawMuts}->{$mut}}); }
                                $rawCounts{ins}->[$_]++ for (@{$lastRead{$row[0]}->{ins}});

                            }

                            if (@{$lastRead{$row[0]}->{muts}}) {

                                for (@{$lastRead{$row[0]}->{muts}}) {

                                    $counts[$_]++;
                                    $stats{substr($ref{sequence}, $_, 1)}++;

                                }

                                $stats{mutated} += @{$lastRead{$row[0]}->{start}};

                                if ($mutmap) {

                                    # This ensures transcripts with no reads are not added to the MM file
                                    if ($firstRead) {

                                        $firstRead = 0;
                                        $mmIO->append_transcript($id, $ref{sequence});

                                    }

                                    if (@{$lastRead{$row[0]}->{start}} == 2 && 
                                        ((!$lastRead{$row[0]}->{overlapping} && $mmSplitPairedEnd && $lastRead{$row[0]}->{gap} > $mmSplitPairedEnd) || ($lastRead{$row[0]}->{overlapping} && $forceSingleRead))) {

                                        my (@mut1, @mut2);
                                        @mut1 = grep { inrange($_, [$lastRead{$row[0]}->{start}->[0], $lastRead{$row[0]}->{end}->[0]]) } @{$lastRead{$row[0]}->{muts}};
                                        @mut2 = grep { inrange($_, [$lastRead{$row[0]}->{start}->[1], $lastRead{$row[0]}->{end}->[1]]) } @{$lastRead{$row[0]}->{muts}};

                                        $mmIO->append_read($lastRead{$row[0]}->{start}->[0], $lastRead{$row[0]}->{end}->[0], scalar(@mut1), \@mut1) if (@mut1);
                                        $mmIO->append_read($lastRead{$row[0]}->{start}->[1], $lastRead{$row[0]}->{end}->[1], scalar(@mut2), \@mut2) if (@mut2);

                                    }
                                    else { $mmIO->append_read($pairStart, $pairEnd, scalar(@{$lastRead{$row[0]}->{muts}}), $lastRead{$row[0]}->{muts}); } 

                                }

                            }

                        }

                    }

                    # Here we handle the coverage both in mutation count and coverage-only modes
                    if ($skippedMate || ($isPairedRead && (@{$lastRead{$row[0]}->{start}} == 2 || $mateUnmapped)) || !$isPairedRead) {

                        my @covered = map { @$_ } @{$lastRead{$row[0]}->{covered}};

                        # If paired-end and single-read is not forced, we have to check if the two mates overlap and take 
                        @covered = uniq(@covered) if (@{$lastRead{$row[0]}->{start}} == 2 && !$forceSingleRead);

                        map {$coverage[$_]++} @covered;

                        delete($lastRead{$row[0]});

                    }

                }
                else {  # RT-stops count mode

                    $row[3] -= $sample->{trim5} + $clip5 + 2;

                    if ($row[3] >= 0) { # Read 5'-end is not before transcript 5'-end

                        $counts[$row[3]]++;
                        $stats{substr($ref{sequence}, $row[3], 1)}++;

                    }
                    else { $cov += $row[3]; } # Start will be negative by the amount that needs to be removed from coverage

                    map { $coverage[$_]++ } max(0, $row[3]) .. min($ref{length} - 1, $row[3] + $cov + $clip5);

                    unless ($row[1] & 256) {

                        $readsCount++;
                        $stats{totalPrimary}++;

                    }

                    $stats{total}++;

                }

            }

            close($fh);

            foreach my $readId (keys %lastRead) {

                my ($pairStart, $pairEnd, @covered);
                ($pairStart, $pairEnd) = processMutReads($id, $lastRead{$readId});

                if ($outRawCounts) {

                    foreach my $mut (keys %{$lastRead{$readId}->{rawMuts}}) { $rawCounts{$mut}->[$_]++ for (@{$lastRead{$readId}->{rawMuts}->{$mut}}); }
                    $rawCounts{ins}->[$_]++ for (@{$lastRead{$readId}->{ins}});

                }

                if (@{$lastRead{$readId}->{muts}}) {

                    for (@{$lastRead{$readId}->{muts}}) {

                        $counts[$_]++;
                        $stats{substr($ref{sequence}, $_, 1)}++;

                    }

                    $stats{mutated} += @{$lastRead{$readId}->{start}};

                    if ($mutmap) {

                        # This ensures transcripts with no reads are not added to the MM file
                        if ($firstRead) {

                            $firstRead = 0;
                            $mmIO->append_transcript($id, $ref{sequence});

                        }

                        if (@{$lastRead{$readId}->{start}} == 2 && 
                            ((!$lastRead{$readId}->{overlapping} && $mmSplitPairedEnd && $lastRead{$readId}->{gap} > $mmSplitPairedEnd) || ($lastRead{$readId}->{overlapping} && $forceSingleRead))) {

                            my (@mut1, @mut2);
                            @mut1 = grep { inrange($_, [$lastRead{$readId}->{start}->[0], $lastRead{$readId}->{end}->[0]]) } @{$lastRead{$readId}->{muts}};
                            @mut2 = grep { inrange($_, [$lastRead{$readId}->{start}->[1], $lastRead{$readId}->{end}->[1]]) } @{$lastRead{$readId}->{muts}};

                            $mmIO->append_read($lastRead{$readId}->{start}->[0], $lastRead{$readId}->{end}->[0], scalar(@mut1), \@mut1) if (@mut1);
                            $mmIO->append_read($lastRead{$readId}->{start}->[1], $lastRead{$readId}->{end}->[1], scalar(@mut2), \@mut2) if (@mut2);

                        }
                        else { $mmIO->append_read($pairStart, $pairEnd, scalar(@{$lastRead{$readId}->{muts}}), $lastRead{$readId}->{muts}); } 

                    }

                }

                @covered = map { @$_ } @{$lastRead{$readId}->{covered}};

                # If paired-end and single-read is not forced, we have to check if the two mates overlap and take 
                @covered = uniq(@covered) if (@{$lastRead{$readId}->{start}} == 2 && !$forceSingleRead);

                map {$coverage[$_]++} @covered;

                delete($lastRead{$readId});

            }

            if (exists $masks{$id}) {

                @counts[$_->[0] .. $_->[1]] = (0) x ($_->[1] - $_->[0] + 1) for (@{$masks{$id}});
                @coverage[$_->[0] .. $_->[1]] = (0) x ($_->[1] - $_->[0] + 1) for (@{$masks{$id}});

            }

            my $rcEntry = RF::Data::RC->new( id         => $id,
                                             sequence   => $ref{sequence},
                                             counts     => \@counts,
                                             coverage   => \@coverage,
                                             readscount => $readsCount );

            $rcIO->write($rcEntry);

            if (sum(@coverage)) {

                push(@{$stats{ids}}, $id);
                push(@{$stats{medianCov}}, median(@coverage));
                push(@{$stats{rawCounts}}, clonehashref(\%rawCounts)) if ($outRawCounts);

            }

        }
        else { return({ error => "Error: Unable to read file \"" . $sample->{file} . "\"" }); }

        unlink($tmpdir . ".$id.readSorted." . $sample->{file} . ".bam") if ($sortByReadName);

    }

    $stats{onlyMut} = \%onlyMut;

    $rcIO->close();
    $mmIO->close() if ($mutmap);

    return(\%stats);

}

sub processMutReads {

    my ($id, $lastRead) = @_;

    my ($pairStart, $pairEnd);
    $pairStart = min(@{$lastRead->{start}});
    $pairEnd = max(@{$lastRead->{end}});

    if (@{$lastRead->{muts}} || (!$noins && @{$lastRead->{ins}})) {

        if (!$noins) {

            push(@{$lastRead->{muts}}, @{$lastRead->{ins}});
            @{$lastRead->{ins}} = uniq(@{$lastRead->{ins}});

        }

        @{$lastRead->{muts}} = sort { $a <=> $b } @{$lastRead->{muts}};

        # Unless otherwise specified, only mutations shared by both mates (if they overlap)
        # must be retained. To this end, we take the mutations in the overlapping region and check
        # if they are present 2 times, otherwise we discard them
        if (@{$lastRead->{start}} == 2) {

            # In case reads have been resorted by read name, the two mates might have been swapped,
            # and the one with the lowest start mapping position might not come first anymore, so
            # we re-sort them
            if ($sortByReadName) {

                my @i = sort { $lastRead->{start}->[$a] <=> $lastRead->{start}->[$b] } 0 .. 1;
                @{$lastRead->{start}} = @{$lastRead->{start}}[@i];
                @{$lastRead->{end}} = @{$lastRead->{end}}[@i];

            }
            
            if ($lastRead->{start}->[1] <= $lastRead->{end}->[0]) {

                my ($overlapStart, $overlapEnd);
                $overlapStart = $lastRead->{start}->[1];
                $overlapEnd = min(@{$lastRead->{end}}); # Handles the case of dovetail reads
                $lastRead->{overlapping} = 1;
                $lastRead->{gap} = 0;

                if (!$peAllMuts) {

                    my (@mutsInOverlap, @mutsOutOverlap, %duplicates);
                    @mutsInOverlap = grep { ++$duplicates{$_} == 2 && inrange($_, [$overlapStart, $overlapEnd]) } @{$lastRead->{muts}};
                    @mutsOutOverlap = grep { !inrange($_, [$overlapStart, $overlapEnd]) } @{$lastRead->{muts}};
                    @{$lastRead->{muts}} = sort { $a <=> $b } (@mutsInOverlap, @mutsOutOverlap);

                }

            }
            else { 
                
                $lastRead->{overlapping} = 0;
                $lastRead->{gap} = $lastRead->{start}->[1] - $lastRead->{end}->[0] - 1; # The gap between 2 non-overlapping mates
                
            }

        }

        @{$lastRead->{muts}} = uniq(@{$lastRead->{muts}});
        @{$lastRead->{muts}} = rmconsecutive(@{$lastRead->{muts}}) if ($rmconsecutive);
        @{$lastRead->{muts}} = collapsemutations(@{$lastRead->{muts}}) if ($collapse);

        if (exists $masks{$id}) {

            foreach my $mask (@{$masks{$id}}) {

                @{$lastRead->{muts}} = grep { $_ < $mask->[0] || $_ > $mask->[1] } @{$lastRead->{muts}};

                if ($pairStart >= $mask->[0] && $pairStart <= $mask->[1]) { $pairStart = $mask->[1] + 1; }
                elsif ($pairEnd >= $mask->[0] && $pairEnd <= $mask->[1]) { $pairEnd = $mask->[0] - 1; }

            }

        }

    }

    return($pairStart, $pairEnd);

}

sub cleanup {

    if (!-s $output . "error.out") { unlink($output . "error.out"); }
    else { print "\n\n  [!] Warning: Execution completed with error(s)/warning(s). Please check the \"${output}error.out\" file\n"; }

    rmtree($tmpdir);

}

sub guessTypeAndSorting {

    my $file = shift;

    my ($type, $header, $eof, $sorted, 
        $i, $h, $missingMD, @data, %header);
    $i = $h = $missingMD = 0;
    $header = "\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00";
    $eof = "\x1f\x8b\x08\x04\x00\x00\x00\x00\x00\xff\x06\x00\x42\x43\x02\x00\x1b\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00";

    open(my $fh, "-|", "$samtools view -h --no-PG $samtoolsParams $file 2>&1") or die "\n  [!] Error: Unable to open sample \"$file\" ($!)\n\n";
    while (<$fh>) {

        next if (/PG line/);

        if (substr($_, 0, 1) eq "@") {

            my $tag = substr($_, 1, 2);

            if ($tag eq "SQ") {

                my @row = split "\t";
                $header{(split(":", $row[1]))[1]} = $h;
                $h++;

            }
            elsif ($tag eq "HD") {

                my @row = split "\t";
                $sorted = 1 if ((split(":", $row[1]))[1] eq "coordinate");

            }

        }
        else {

            if ($mutcount) { $missingMD = 1 unless (/\tMD:Z:/); }

            $i++;
            my @row = split "\t";

            die "  [!] Error: Sample \"$file\" is not a valid SAM/BAM file\n\n" if (/(?:Failed to open file|fail to read the header)/ ||
                                                                                    @row < 12 || !isint($row[1]) || !isdna($row[9]));

            if (@data) {
                
                if ($header{$row[2]} < $header{$data[0]} || ($header{$row[2]} == $header{$data[0]} && $row[3] < $data[1])) {

                    $sorted = 0;

                    last;

                }
                else { $sorted = 1; }

            }

            @data = ($row[2], $row[3]);

        }

        last if ($i == 100);

    }
    close($fh);

    undef(@data);

    open($fh , "<:raw", $file);
    read($fh, $data[0], 16);
    seek($fh, -28, SEEK_END);
    read($fh, $data[1], 28);
    close($fh);

    if ($data[0] eq $header && $data[1] eq $eof) { $type = "BAM"; }
    else { $type = "SAM"; }

    return($type, $sorted, $missingMD);

}

sub parsemd { # SAM MD flag parser

    my ($row, $reference) = @_;

    my ($md, $llen, @pos, @quals,
        %lowQual, %pos, %posMap);
    $llen = $row->[3] - 1;

    return([], {}, [], 1) if (@{$row} < 11);

    for (10 .. $#{$row}) { if ($row->[$_] =~ m/^MD:Z:(.+)$/) { $md = $1; last; } }

    return([], {}, [], 1) if (!defined $md);

    %posMap = ref2read($row);
    @quals = split(//, $row->[10]);
    $md = uc($md);

    while($md =~ m/^(\d+)/) {

        my ($len, $next);
        $len = $1;
        $md =~ s/^$len//;

        while($md =~ m/^(\D)/) {

            $next = $1;

            if ($next eq "^") { # Deletion

                my ($del, $start, $end, $win,
                    $real, $disttodel, %collapsed);

                $md =~ s/^\^//;
                $md =~ m/^([ACGNT]+)/;
                $del = $1;

                if (!$nodel && length($del) <= $maxdel) { # Count unambiguously mapped deletions as mutations

                    my ($before, $after, @truedel);

                    # Extract a window of 20 nt (+/- 10nt) centered on the deletion
                    $start = $len + $llen - 1 >= 9 ? $len + $llen - 1 - 9 : 0;
                    $end = $len + $llen - 1 + length($del) + 10 < length($reference) ? $len + $llen - 1 + length($del) + 10 : length($reference) - 1;
                    $win = substr($reference, $start, $end - $start + 1);
                    $disttodel = $len + $llen >= 10 ? 10 : $len + $llen;
                    $real = substr($reference, $start, $disttodel) . substr($reference, $start + $disttodel + length($del), $end - $start - $disttodel - length($del) + 1);

                    # Slide the deletion along sequence, and build hash table
                    push(@{$collapsed{substr($win, 0, $_) . substr($win, $_ + length($del), length($win) - length($del) - $_)}}, $start + $_ + length($del) - 1) for (0 .. length($win) - length($del));

                    if (@{$collapsed{$real}} > 1) { # Deletion is not unambiguously mapped

                        if (!$noambiguous) {

                            my $truedel = $leftalign ? min(@{$collapsed{$real}}) : max(@{$collapsed{$real}});
                            $before = $truedel - length($del);
                            $after = $truedel + 1;
                            @truedel = $rightdel ? ($truedel) : ($leftdel ? ($before + 1) : ($before + 1 .. $truedel));
                            # if (($evalsurround &&
                            #     ((exists $posMap{$before} && unpack("C*", $quals[$posMap{$before}]) - 33 >= $minqual) || !exists $posMap{$before}) &&
                            #     ((exists $posMap{$after} && unpack("C*", $quals[$posMap{$after}]) - 33 >= $minqual) || !exists $posMap{$after})) ||
                            #     !$evalsurround) { 
                                    
                                push(@pos, @truedel);
                                push(@{$pos{del}}, @truedel) if ($outRawCounts);
                                
                            #}
                            #else { $lowQual{$_} = 1 for (@truedel); }

                        }

                    }
                    else {

                        $before = $collapsed{$real}->[0] - length($del);
                        $after = $collapsed{$real}->[0] + 1;
                        @truedel = $rightdel ? ($collapsed{$real}->[0]) : ($leftdel ? ($before + 1) : ($before + 1 .. $collapsed{$real}->[0]));
                        # if (($evalsurround &&
                        #     ((exists $posMap{$before} && unpack("C*", $quals[$posMap{$before}]) - 33 >= $minqual) || !exists $posMap{$before}) &&
                        #     ((exists $posMap{$after} && unpack("C*", $quals[$posMap{$after}]) - 33 >= $minqual) || !exists $posMap{$after})) ||
                        #     !$evalsurround) {

                        push(@pos, @truedel);
                        push(@{$pos{del}}, @truedel) if ($outRawCounts);

                        # }
                        # else { $lowQual{$_} = 1 for (@truedel); }

                    }

                }

                $len += length($del);
                $md =~ s/^$del//;

            }
            elsif ($next =~ m/^([ACGNT])$/) {

                my $readBase = substr($row->[9], $posMap{$llen + $len}, 1);

                if ($next ne "N" && $readBase ne "N") {

                    # Include the mutation if both the mutated base and the bases immediately surroundinding
                    # have a quality score >= minqual, and the mutated base is not a N in the reference or in the read
                    if (unpack("C*", $quals[$posMap{$llen + $len}]) - 33 >= $minqual &&
                        (($evalsurround &&
                         ((exists $posMap{$llen + $len - 1} && unpack("C*", $quals[$posMap{$llen + $len - 1}]) - 33 >= $minqual) || !exists $posMap{$llen + $len - 1}) &&
                         ((exists $posMap{$llen + $len + 1} && unpack("C*", $quals[$posMap{$llen + $len + 1}]) - 33 >= $minqual) || !exists $posMap{$llen + $len + 1})) ||
                         !$evalsurround) &&
                        (($onlyMut && $onlyMut{$next . $readBase}->{take}) || !$onlyMut)) {

                        push(@pos, $len + $llen);
                        push(@{$pos{$next . $readBase}}, $len + $llen) if ($outRawCounts);

                    }
                    else { $lowQual{$len + $llen} = 1; }

                    $onlyMut{$next . $readBase}->{count}++ if ($onlyMut);

                }

                $md =~ s/^$next//;
                $len++;

            }

        }

        $llen += $len;

    }

    return(\@pos, \%pos, \%lowQual, 0);

}

sub parsecigar {

    my $cigar = shift;

    my ($clip, $cov, $truelen, $last,
        @ins);
    ($clip, $cov, $truelen, $last) = (0, 0, 0, 0);

    while ($cigar =~ m/^(\d+[SH])/) {

        my $clipsh = $1;
        $cigar =~ s/^$clipsh//;
        $clipsh =~ s/[SH]$//;
        $clip += $clipsh;

    }

    $cov = $clip if ($includeclip && (!$maxClipped || $clip <= $maxClipped));

    while($cigar =~ m/^(\d+)([MIDNSHP=X])/) {

        my ($n, $op) = ($1, $2);
        $cov += $n if ($op =~ /^[DM=X]$/ || ($includeclip && $op =~ /^[SH]$/ && (!$maxClipped || $n <= $maxClipped)));
        $last += $n if ($op =~ m/^[DNM=X]$/);
        $truelen += $n if ($op =~ m/^[MI=X]$/);
        push(@ins, $last - 1) if ($op eq "I"); # Relative position of the inserted nucleotide

        $cigar =~ s/^$n$op//;

    }

    return($clip, $cov, $truelen, \@ins);

}

sub editdist {

    my $row = shift;

    my $dist = 0;

    if ($row =~ m/MD:Z:(\S+)/) {

        my ($md, $del, $mut);
        $md = $1;
        ($del) = $md =~ s/\^(?:N*[ACGT]+|[ACGT]+N*)[ACGTN]*//g; # This is to avoid counting N-only deletions
        ($mut) = $md =~ tr/ACGT/ACGT/;
        $dist = $del + $mut;

    }

    return($dist);

}

# Collapses consecutive mutations toward the 3'-most one
# e.g. collapsemutations(10, 11, 12) = 12
# Distance tollerance between consecutive mutations is set by $maxmutdist (default: 2)
sub collapsemutations {

    my @values = sort {$a <=> $b} uniq(@_);

    return unless(@values);

    for (my $i=0; $i < $#values; $i++) {

        if (abs(diff(@values[$i..$i+1])) <= $maxmutdist) {

            splice(@values, $i, 1);
            $i--;

        }

    }

    return(@values);

}

sub rmconsecutive {

    my @muts = @_;

    return(@muts) if (@muts < 2);

    my ($last, @selected);

    for(my $i = 0; $i < @muts; $i++) {

        next if ($i > 0 && inrange($muts[$i], [$muts[$i - 1], $muts[$i - 1] + $rmconsecutive]));
	    next if ($i < $#muts && inrange($muts[$i], [$muts[$i + 1] - $rmconsecutive, $muts[$i + 1]]));

	    push(@selected, $muts[$i]);

    }

    return(@selected);

}

sub ref2read { # Reference to read relative position

    my $row = shift;

    my ($rpos, $qpos, $cigar, $lastrpos,
        $lastqpos, @ops, %pos, %ops);
    $cigar = $row->[5];
    $rpos = $lastrpos = $row->[3] - 1;
    $qpos = $lastqpos = 0;
    %ops = ( "M" => [1, 1],
             "I" => [1, 0],
             "D" => [0, 1],
             "N" => [0, 1],
             "S" => [1, 0],
             "H" => [0, 0],
             "P" => [0, 0],
             "=" => [1, 1],
             "X" => [1, 1] );

    while($cigar =~ m/^(\d+)([MIDNSHP=X])/) {

        my ($n, $op) = ($1, $2);
        push(@ops, [$n, $op]);
        $cigar =~ s/^$n$op//;

    }

    for (@ops) {

        my ($n, $op) = @{$_};
        next if (!$ops{$op}->[0] &&
                 !$ops{$op}->[1]);

        $qpos += $n if ($ops{$op}->[0]);
        $rpos += $n if ($ops{$op}->[1]);

        if ($ops{$op}->[0] &&
            $ops{$op}->[1]) {

            if (my $diff = $qpos - $lastqpos) { $pos{$lastrpos + $_} = $lastqpos + $_ for (0 .. $diff - 1); }

        }

        $lastqpos = $qpos;
        $lastrpos = $rpos;

    }

    return(%pos);

}

sub help {

    print "\n  [!] Error: Invalid option. Please check the help\n" if ($_[0]);

    die <<HELP;

 RF Count (v$Core::Utils::VERSION)
 RNA Framework [http://www.rnaframework.com]

 Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
 Summary: Calculates per-base RT-stops/mutations and coverage

 Usage:   rf-count [Options] Sample1.sam Sample2.bam ... Samplen.sam
          rf-count [Options] sample1:Sample1.sam ... sampleN:Samplen.sam      # With sample labels

 Options                                          Description
 -p   or --processors              <int>          Number of files to process in parallel (Default: 1)
 -wt  or --working-threads         <int>          Number of working threads to use for each file (Default: 1).
                                                  Note: at least -p <files> * -wt <threads> processors are required.
 -P   or --per-file-progress                      The progress of each individual file is shown as a separate progress bar
                                                  Note: this only works in interactive mode. If output is redirected to file, a single
                                                        progress bar is shown reporting the overall status. Similarly, if the number of
                                                        samples exceedes the number of lines in the terminal, a single progress bar is
                                                        shown.
 -a   or --fast                                   Reference sequences are kept in memory instead of being loaded on the fly
                                                  Note: this can significantly decrease the runtime when processing large sets of 
                                                        transcripts, but increases memory usage
 -o   or --output-dir              <string>       Output directory (Default: rf_count/)
 -ow  or --overwrite                              Overwrites output directory (if the specified path already exists)
 -s   or --samtools                <string>       Path to samtools executable (Default: assumes samtools is in PATH)
 -sm  or --samtools-memory         <string>       Maximum memory (per thread) to be used by samtools sort (Default: 500M)
                                                  Note: at least -p <files> * -wt <threads> * -sm <memory> RAM (+swap) is needed
 -g   or --img                                    Enables the generation of statistic plots of per-base % mutations/RT-stops and, for MaP
                                                  experiments only, of % mutated reads (requires R)
 -R   or --R-path                  <string>       Path to R executable (Default: assumes R is in PATH)
 -t5  or --trim-5prime             <int>[,<int>]  Comma separated list (no spaces) of values indicating the number of bases trimmed from the
                                                  5'-end of reads in the respective sample SAM/BAM files (Default: 0)
                                                  Note: Values must be provided in the same order as the input files.
                                                        If a single value is specified along with multiple SAM/BAM files, it will
                                                        be used for all files.
                                                        This parameter has no effect when -m (or --count-mutations) is enabled
 -f   or --fasta                   <string>       Path to a FASTA file containing the reference transcripts
                                                  Note: Transcripts in this file must match transcripts in SAM/BAM file headers
 -mf  or --mask-file               <string>       Path to a mask file
 -ndd or --no-discard-duplicates                  Reads marked as PCR/optical duplicates, discarded by default, will be also considered
 -pn  or --primary-only                           Considers only primary alignments (SAM flag != 0x100)
 -po  or --paired-only                            When processing SAM/BAM files from paired-end experiments, only those reads for which
                                                  both mates are mapped will be considered (SAM flag = 0x01)
 -pp  or --properly-paired                        When processing SAM/BAM files from paired-end experiments, only those reads mapped in a
                                                  proper pair will be considered (SAM flag = 0x02)
 -ic  or --include-clipped                        Includes the soft-clipped portions of reads when calculating coverage (provided that the
                                                  number of soft-clipped bases is <= --max-clipped)
                                                  Note: In RT-stop mode, the default behavior is to exclude soft/hard-clipped reads.
                                                        When this option is active, the RT-stop position is considered to be the position
                                                        preceding the clipped bases. These bases will be also included in the reads in MM files.
 -mp  or --max-clipped             <int>          Maximum allowed number of clipped bases from either end of the read (requires -ic) 
                                                  (>= 0, Default: 0 [no limit])
                                                  Note: If the number of clipped bases exceeds this value, clipped bases will not be counted in
                                                        the final coverage (and will not be included in the reads in MM files). In RT-stop mode,
                                                        reads having more than this number of clipped bases will be discarded
 -mq  or --map-quality                            Minimum mapping quality to consider a read (Default: 0)
 -co  or --coverage-only                          Only calculates per-base coverage (disables RT-stops/mutations count)
 -m   or --count-mutations                        Enables mutations count instead of RT-stops count (for SHAPE-MaP/DMS-MaPseq)

 |
 +- Mutation count mode options
    -sbn or --sort-by-read-name                    If processing large paired-end experiments, reads will be pre-sorted by read name, so
                                                   that paired-end reads will be processed consecutively, significantly reducing the memory
                                                   footprint (but to the cost of increased disk usage)
    -pam or --paired-end-all-mutations             When processing paired-end reads, if they overlap and mutations are only present in one
                                                   of the two mates, these will be retained
                                                   Note: the default behavior is to discard mutations in overlapping regions that are not 
                                                         supported by both mates
    -fsr or --force-single-read                    When processing paired-end reads, the two mates will be treated as separate reads
                                                   Note: if the two mates overlap, this will cause both coverage and mutations within
                                                         the overlapping portion to be counted twice. Furthermore, the two mates will 
                                                         be reported separately in the MM file, rather than as one long read 
    -orc or --out-raw-counts                       Generates a text file reporting raw (unfiltered) mutation counts, broken down by class
                                                   (single nucleotide mutations, insertions, deletions)
                                                   Note: the reported counts are affected by the -nd, -na, -mq and -md parameters, but not 
                                                         by deletion realignment options
    -om  or --only-mut                <string>     Only the specified mutations will be counted
                                                   Note: mutations must be provided in the form [original]2[mutated]. For example, "A2T" (or "A>T",
                                                         or "A:T") will only count mutation events in which a reference A base has been sequenced as
                                                         a T. IUPAC codes are also accepted. Multiple mutations must be provided as a comma (or semi-colon)
                                                         separated list (e.g. A2T;C:N,G>A). When specified, this parameter automatically disables insertion
                                                         and deletion count
    -ds  or --discard-shorter         <int>        Discards reads spanning less than this number of bases, excluding clipped bases (unless -ic is specified)
                                                   (Default: 1)
                                                   Note: when set to "MEDIAN" (case-insensitive), the median read length will be used
    -q   or --min-quality                          Minimum quality score value to consider a mutation (Phred+33, Default: 20)
    -ncl or --no-cov-low-qual                      If a mutated base (or one of the surrounding bases, if -es is specified) does not exceed the -mq minimum
                                                   quality threshold, that base will be considered as non covered
    -es  or --eval-surrounding                     When considering a mutation, also evaluate the quality of surrounding bases (+/- 1nt)
                                                   Note: the quality score threshold set by -q (or --min-quality) also applies to these bases
    -nd  or --no-deletions                         Ignores deletions
    -ni  or --no-insertions                        Ignores insertions
    -na  or --no-ambiguous                         Ignores ambiguously mapped deletions
                                                   Note: The default behavior is to re-align them to their right-most valid position (or to their
                                                         left-most valid position if -la has been specified)
    -la  or --left-align                           Re-aligns ambiguously mapped deletions to their left-most valid position
                                                   Note: by default, ambiguously mapped deletion are re-aligned to their right-most valid position
    -rd  or --right-deletion                       Only the right-most base in a deletion is marked as mutated
    -ld  or --left-deletion                        Only the left-most base in a deletion is marked as mutated
    -md  or --max-deletion-len        <int>        Ignores deletions longer than this number of nucleotides (Default: 10)
    -me  or --max-edit-distance       <float>      Discards reads with editing distance frequency higher than this threshold (0<m<=1, Default: 0.15 [15%])
    -eq  or --median-quality          <int>        Median quality score threshold for discarding low-quality reads (Phred+33, Default: 20)
    -dc  or --discard-consecutive     <int>        Discards consecutive mutations within this distance from eachothers
    -cc  or --collapse-consecutive                 Collapses consecutive mutations/indels toward the 3'-most one
    -mc  or --max-collapse-distance   <int>        Maximum distance between consecutive mutations/indels to allow collapsing (requires -cc, >=0, Default: 2)
    -mv  or --max-coverage            <int>        Downsamples reads to achieve this maximum mean per-base coverage (>=1000, Default: off)
    -mm  or --mutation-map                         Generates a mutation map (MM) file for alternative structure deconvolution with DRACO
    -msp or --mm-split-paired-end     <int>        In case of non-overlapping paired-end reads, the two mates and the respective mutatations will be
                                                   reported as separate reads in the MM file if the distance between the end of R1 and the start of R2
                                                   exceeds this value (>= 0, Default: 0 [no limit])
                                                   Note: The default behavior is to report them as a single long read spanning from the start of R1 to 
                                                         the end of R2
    -wl  or --whitelist               <int>        Generates a DRACO-compatible whitelist file, containing the IDs of transcripts with median
                                                   coverage >= to the specified value

HELP

}
