#!/usr/bin/env perl

##
# RF Map
# Epigenetics Unit @ HuGeF [Human Genetics Foundation]
#
# Author:  Danny Incarnato (danny.incarnato[at]hugef-torino.org)
# Summary: Performs reads pre-processing and mapping
#
# This program is free software, and can be redistribute  and/or modified
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# Please see <http://www.gnu.org/licenses/> for more informations.
##

use strict;
use Config;
use File::Basename;
use File::Copy;
use File::Spec;
use FindBin qw($Bin);
use Getopt::Long qw(:config no_ignore_case);

use lib $Bin . "/lib";

use Core::Utils;
use Core::Mathematics qw(:all);
use Core::Process::Queue;
use Data::Sequence::Utils;
use Term::Table;
use Term::Progress;

$|++;

my ($tmp, $tmpdir, $output, $trim3,
    $trim5, $bv, $bn, $ba,
    $wt, $bi, $bc, $bD, $bN,
    $bR, $bmp, $bdp, $bs, $progressBar,
    $bma, $bnr, $bdg, $bfg, $ca5, $ca3,
    $cl, $bowtie, $bowtie2, $cutadapt,
    $samtools, $sam, $clipped, $help,
    $overwrite, $bm, $nobam, $threads, 
    $bseed, $processmanager, $table, $bk, 
    $cm, $mp, $bnf, $mo, $logdir, $keeplogs, 
    $cq5, $cq3, $cqo, $bd, $haspaired,
    $trimN, $maxN, @tmp, %files);

do {

    local $SIG{__WARN__} = sub { };

    GetOptions( "h|help"                    => \$help,
                "o|output-dir=s"            => \$output,
                "ow|overwrite"              => \$overwrite,
                "nb|no-bam"                 => \$nobam,
                "p|processors=i"            => \$threads,
                "wt|working-threads=i"      => \$wt,
                "b|bowtie=s"                => \$bowtie,
                "c|cutadapt=s"              => \$cutadapt,
                "s|samtools=s"              => \$samtools,
                "cm|cutadapt-min-align=i"   => \$cm,
                "ca5|cutadapt-5adapter=s"   => \$ca5,
                "ca3|cutadapt-3adapter=s"   => \$ca3,
                "cl|cutadapt-len=i"         => \$cl,
                "cq5|cutadapt-5quality=i"   => \$cq5,
                "cq3|cutadapt-3quality=i"   => \$cq3,
                "cqo|cutadapt-quality-only" => \$cqo,
                "cp|clipped"                => \$clipped,
                "b5|bowtie-trim5=i"         => \$trim5,
                "b3|bowtie-trim3=i"         => \$trim3,
                "bl|bowtie-seedlen=i"       => \$bseed,
                "bv|bowtie-v=i"             => \$bv,
                "bn|bowtie-n=i"             => \$bn,
                "ba|bowtie-all"             => \$ba,
                "bm|bowtie-max=i"           => \$bm,
                "bi|bowtie-index=s"         => \$bi,
                "bc|bowtie-chunkmbs=i"      => \$bc,
                "bk|bowtie-multimap=i"      => \$bk,
                "bN|bowtie-N=i"             => \$bN,
                "bD|bowtie-D=i"             => \$bD,
                "bR|bowtie-R=i"             => \$bR,
                "bmp|bowtie-mp=s"           => \$bmp,
                "bdp|bowtie-dpad=i"         => \$bdp,
                "bs|bowtie-softclip"        => \$bs,
                "bd|bowtie-dovetail"        => \$bd,
                "bma|bowtie-ma=i"           => \$bma,
                "bnr|bowtie-norc"           => \$bnr,
                "bnf|bowtie-nofw"           => \$bnf,
                "bdg|bowtie-rdg=s"          => \$bdg,
                "bfg|bowtie-rfg=s"          => \$bfg,
                "mp|mapping-params=s"       => \$mp,
                "mo|manual-only"            => \$mo,
                "kl|keep-logs"              => \$keeplogs,
                "b2|bowtie2"                => \$bowtie2,
                "ctn|cutadapt-trim-N"       => \$trimN,
                "cmn|cutadapt-max-N=s"      => \$maxN ) or help(1);

};

help() if ($help);

# Default values
$tmp //= randalphanum(0xf);
$output //= "rf_map/";
$wt ||= 1;
$threads ||= 1;
$bowtie //= $bowtie2 ? which("bowtie2") : which("bowtie");
$cutadapt //= which("cutadapt");
$samtools //= which("samtools");
$ca5 ||= "GTTCAGAGTTCTACAGTCCGACGATC";
$ca3 ||= "AGATCGGAAGAGCACACGTCT";
$cl //= 25;
$cm //= 1;
$cq3 //= 20;
$trim5 //= 0;
$trim3 //= 0;
$bc //= 128;
$bm //= 1;
$bn //= $bowtie2 ? 1 : 2;
$bseed //= $bowtie2 ? 22 : 28;
$bN //= 1;
$bD //= 15;
$bR //= 2;
$bmp //= "6,2";
$bdg //= "5,3";
$bfg //= "5,3";
$bdp //= 15;
$bma //= 2;
$output =~ s/\/?$/\//;
$bc = 2147483647 if ($bc > 2147483647);
$maxN = round($maxN) if (isnumeric($maxN) && $maxN > 1);
$tmpdir = $output . "tmp/";
$logdir = $output . "logs/";

if (isdna(join("", split(/,/, $ca5)))) {

    my @ca5 = split(/,/, $ca5);
    @ca5 = map { revcomp($_) } @ca5;

}

##
# Input validation
##

die "\n  [!] Error: No sample FastQ file provided\n\n" if (!@ARGV);
die "\n  [!] Error: Number of processors must be an integer greater than 0\n\n" if ($threads < 1);
die "\n  [!] Error: Working threads value must be an integer greater than 0\n\n" if ($wt < 1);

# Cutadapt validation
die "\n  [!] Error: No 5' adapter's sequence provided\n\n"if (!defined $ca5);
die "\n  [!] Error: Invalid 5' adapter's sequence\n\n" if (!isdna(join("", split(/,/, $ca5))));
die "\n  [!] Error: No 3' adapter's sequence provided\n\n"if (!defined $ca3);
die "\n  [!] Error: Invalid 3' adapter's sequence\n\n" if (!isdna(join("", split(/,/, $ca3))));
die "\n  [!] Error: No minimum read length provided\n\n" if (!defined $cl);
die "\n  [!] Error: Minimum read length should be an integer >= 10\n\n" if ($cl < 10);
die "\n  [!] Error: Minimum adapter alignment should be an integer > 0\n\n" if ($cm < 1);
die "\n  [!] Error: Number of processors must be an integer greater than 0\n\n" if ($threads < 1);
die "\n  [!] Error: Maximum number of Ns must be positive\n\n" if (defined $maxN && !ispositive($maxN));

# Bowtie validation
die "\n  [!] Error: 5'-end trimming value must be a positive integer\n\n" if (!ispositive($trim5) || !isint($trim5));
die "\n  [!] Error: 3'-end trimming value must be a positive integer\n\n" if (!ispositive($trim3) || !isint($trim3));
die "\n  [!] Error: Parameter -bk must be an integer > 0\n\n" if (defined $bk && (!isint($bk) || !$bk));
die "\n  [!] Error: Parameters -ba and -bk are mutually exclusive\n\n" if ($bk > 1 && $ba);
die "\n  [!] Error: Parameters -bnr and -bnf are mutually exclusive\n\n" if ($bnr && $bnf);

if ($bowtie2) {

    die "\n  [!] Error: Bowtie seed length value must be an integer >= 3 and <= 32\n\n" if (!isint($bseed) || $bseed < 3 || $bseed > 32);
    die "\n  [!] Error: Bowtie seed mismatches value must be 0 or 1\n\n" if ($bN !~ m/^[01]$/);
    die "\n  [!] Error: Bowtie maximum seed extension attempts value must be an integer >= 0\n\n" if (!isint($bD) || !ispositive($bD));
    die "\n  [!] Error: Bowtie maximum re-seeding attempts value must be an integer >= 0\n\n" if (!isint($bR) || !ispositive($bR));
    die "\n  [!] Error: Invalid Bowtie maximum and minimum mismatch penalities format\n\n" if ($bmp !~ m/^\d+(?:,\d+)$/);
    die "\n  [!] Error: Invalid Bowtie read's gap open and extend penalities format\n\n" if ($bdg !~ m/^\d+(?:,\d+)$/);
    die "\n  [!] Error: Invalid Bowtie reference's gap open and extend penalities format\n\n" if ($bfg !~ m/^\d+(?:,\d+)$/);
    die "\n  [!] Error: Bowtie number of extra DP table bases must be an integer >= 0\n\n" if (!isint($bdp) || !ispositive($bdp));
    die "\n  [!] Error: Bowtie match bonus in local alignment mode must be an integer >= 0\n\n" if (!isint($bma) || !ispositive($bma));

}
else {

    die "\n  [!] Error: Bowtie seed length value must be an integer >= 5\n\n" if (!isint($bseed) || $bseed < 5);
    die "\n  [!] Error: Bowtie mismatches value must be an integer comprised between 0 and 3\n\n" if ((defined $bv && $bv !~ m/^[0-3]$/) ||
                                                                                                      (defined $bn && $bn !~ m/^[0-3]$/));

}

if ($mp) { # Evaluate additional mapping parameters

    my $ret = `$bowtie $mp 2>&1`;

    die "\n  [!] Error: Invalid additional mapping parameters (\"" . $mp . "\")." .
        "\n             Please check aligner's documentation for detailed parameters description\n\n" if (($bowtie2 &&
                                                                                                           $ret =~ m/Error: Encountered internal Bowtie 2 exception/) ||
                                                                                                          (!$bowtie2 &&
                                                                                                           $ret =~ m/bowtie: unrecognized option/));

}


warn "\n  [!] Warning: Some input files are duplicates. Considering only unique files...\n" if (@ARGV != uniq(@ARGV));

if (!defined $samtools) { die "\n  [!] Error: samtools is not in PATH\n\n"; }
elsif (!-e $samtools) { die "\n  [!] Error: samtools doesn't exist\n\n"; }
elsif (!-x $samtools) { die "\n  [!] Error: samtools is not executable\n\n"; }
else {

    my $ret = `$samtools 2>&1`;

    if ($ret =~ m/Version: ([\d\.]+)/) {

        my $version = $1;

        die "\n  [!] Error: RF Count requires SAMTools v1 or greater (Detected: v" . $version . ")\n\n" if (substr($version, 0, 1) < 1);

    }
    else { warn "\n  [!] Warning: Unable to detect SAMTools version\n"; }

}

$SIG{__DIE__} = \&cleanup;

print "\n[+] Making output directory...";

if (-e $output) {

    if ($overwrite) {

        my $error = rmtree($output);

        die "\n\n  [!] Error: " . $error . "\n\n" if ($error);

    }
    else { die "\n\n  [!] Error: Output directory already exists." .
               "\n      Please use -ow (or --overwrite) to overwrite output directory\n\n"; }

}

for ($tmpdir, $logdir) { if (my $error = mktree($_, "755")) { die "\n\n  [!] Error: Unable to create output directory ($error)\n\n"; } }

##
# Prepare files
##

$table = Term::Table->new(indent => 2);
$table->head("Sample", "Type", "File(s)");

print "\n[+] Validating FastQ files:\n\n";

foreach my $sample (uniq(@ARGV)) {

    my ($basename, @sample);
    @sample = split(/:/, $sample);

    if (@sample == 2) {

        $basename = shift(@sample);
        @sample = split(/,/, $sample[0]);

    }
    else {

        @sample = split(/,/, $sample);
        $basename = (fileparse($sample[0], qr/((\.[^.\s]+)+)$/))[0];
        $basename =~ s/_R[12]$//; # Remove _R1 or _R2 suffix added by Illumina

    }

    if (uniq(@sample) != @sample) {

        warn "  [!] Warning: Duplicate mate file provided for sample \"$basename\".\n" .
             "               Falling back to single read mode.\n\n";

    }

    @sample = uniq(@sample);
    $haspaired = 1 if (@sample == 2);

    for (@sample) {

        die "  [!] Error: Specified sample file \"$_\" does not exist\n\n" if (!-e $_);
        die "  [!] Error: Sample file \"$_\" is not a valid FastQ file\n\n" unless(checkfq($_));

    }

    push(@tmp, { path   => $sample[0],
                 id     => exists $files{$basename} ? "${basename}_" . $files{$basename} : $basename,
                 file   => (fileparse($sample[0], qr/\.[^.]*/))[0],
                 paired => @sample == 2 ? 1 : 0,
                 path2  => @sample == 2 ? $sample[1] : undef,
                 file2  => @sample == 2 ? (fileparse($sample[1], qr/\.[^.]*/))[0] : undef,
                 gzip   => $sample[0] =~ m/\.gz$/ ? 1 : 0 });

    # Error on mixed R1/R2 gzipped/uncompressed files
    die "  [!] Error: Mixed compressed/uncompressed files for sample \"$basename\"\n\n"if ($tmp[-1]->{paired} &&
                                                                                           (($tmp[-1]->{gzip} && $sample[1] !~ /\.gz$/) ||
                                                                                           (!$tmp[-1]->{gzip} && $sample[1] =~ /\.gz$/)));

    $files{$basename}++;

    $table->row($basename,
                @sample == 2 ? "Paired-end" : "Single-read",
                @sample == 2 ? join(",", ($tmp[-1]->{file}, $tmp[-1]->{file2}) ): $tmp[-1]->{file});

}

$table->print();

print "\n";

# Check if we have a reference index and all required tools
if (defined $bi) {

    my ($ret, $suffix);

    if (!defined $bowtie) { die "\n  [!] Error: " . ($bowtie2 ? "bowtie2" : "bowtie") . " is not in PATH\n\n"; }
    elsif (!-e $bowtie) { die "\n  [!] Error: " . ($bowtie2 ? "bowtie2" : "bowtie") . " doesn't exist\n\n"; }
    elsif (!-x $bowtie) { die "\n  [!] Error: " . ($bowtie2 ? "bowtie2" : "bowtie") . " is not executable\n\n"; }

    if (!defined $cutadapt) { die "\n  [!] Error: cutadapt is not in PATH\n\n"; }
    elsif (!-e $cutadapt) { die "\n  [!] Error: cutadapt doesn't exist\n\n"; }
    elsif (!-x $cutadapt) { die "\n  [!] Error: cutadapt is not executable\n\n"; }

    $ret = `$bowtie --version 2>&1`;

    if ($ret =~ /version (.+)$/m) {

        my $v = $1;

        if (($bowtie2 && substr($v, 0, 1) != 2) ||
            (!$bowtie2 && substr($v, 0, 1) == 2)) {

            warn "\n  [!] Warning: Bowtie " . ($bowtie2 ? "v2" : "v1") . " was expected, but v" . $v . " was detected." .
                 "\n               Automatically switching to v" . substr($v, 0, 1) . "\n";

            $bowtie2 = substr($v, 0, 1) == 2 ? 1 : 0;

        }

    }
    else { warn "\n  [!] Warning: Unable to detect Bowtie version\n"; }

    $suffix = $bowtie2 ? "bt2" : "ebwt";

    # When checking whether index files exist, we use a glob() because:
    # Bowtie indices can be either *.ebwt or *.ebwt1
    # Bowtie2 indices can be either *.bt2 or *.bt21
    for(map { $_ . $suffix } qw(.1. .2. .3. .4. .rev.1. .rev.2.)) { die "\n  [!] Error: Bowtie " . ($bowtie2 ? "v2" : "v1") .
                                                                        " index file \"" . $bi . $_ . "\" doesn't exist\n\n" if (!-e (glob($bi . $_ . "*"))[0]); }

}
else { die "\n  [!] Error: No Bowtie index provided\n\n"; }

# We are processing paired-end data, and we have performed 5'-end quality trimming
if ($haspaired && (($cq5 && !$clipped) || $trim5)) {

    if (!$bowtie2) {

        warn "\n  [!] Warning: at least one of the provided datasets has a paired-end layout," .
             "\n               and 5'-end quality trimming has been enabled." .
             "\n               Use of Bowtie v2 is suggested, as Bowtie v1 does not support dovetailed reads.\n";

    }
    else {

        warn "\n  [!] Warning: at least one of the provided datasets has a paired-end layout," .
             "\n               and 5'-end quality clipping/trimming has been enabled." .
             "\n               Enabling the -bd (or --bowtie-dovetail) parameter is strongly encouraged.\n" if (!$bd &&
                                                                                                                $mp !~ m/--dovetail/); # Dovetail disabled

    }

}

# Starts the process manager
$processmanager = Core::Process::Queue->new( processors => $threads,
                                             verbosity  => 1 );

##
# FastQ processing
##

print "\n";

if (!$clipped) {

    my $qualfilter = $cq5 ? $cq5 . "," . $cq3 : $cq3;
    $cutadapt .= " -j " . $wt;
    $cutadapt .= " --trim-n" if ($trimN);
    $cutadapt .= " --max-n " . $maxN if (defined $maxN);

    foreach my $sample (@tmp) {

        $processmanager->enqueue( command => $sample->{paired} ? $cutadapt . ($cqo ? undef : " -a " . $ca3 . " -A " . $ca5 . " -O " . $cm) . " -m " . $cl . " -q " . $qualfilter . " -o " . $tmpdir . $tmp . "_" . $sample->{file} . ".fq -p " . $tmpdir . $tmp . "_" . $sample->{file2} . ".fq " . $sample->{path} . " " . $sample->{path2} :
                                                                 $cutadapt . ($cqo ? undef : " -a " . $ca3 . " -O " . $cm) . " -m " . $cl . " -q " . $qualfilter . " -o " . $tmpdir . $tmp . "_" . $sample->{file} . ".fq " . $sample->{path},
                                  id      => $sample->{id},
                                  stdout  => $logdir . $sample->{id} . "_cutadapt.log",
                                  stderr  => $logdir . $sample->{id} . "_cutadapt.log" );

        $sample->{path} = $tmpdir . $tmp . "_" . $sample->{file} . ".fq";
        $sample->{path2} = $tmpdir . $tmp . "_" . $sample->{file2} . ".fq" if ($sample->{paired});

    }

    $progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                        colored => 1 );
    $progressBar->init("Clipping adapters");

    $processmanager->parentOnExit(sub { 
        
        die "\n\n  [!] Error: Unable to perform adapter clipping on sample \"$_[0]\"." .
            "\n             Please check input file's format/quality and try again.\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

        $progressBar->update(1); 
        
    });

    $processmanager->start();
    $processmanager->waitall();

    print "\n";

}
else {

    print "[+] Input FastQ files are already clipped. Skipping adapter clipping...\n\n";

    foreach my $sample (@tmp) {

        my $newfile = $tmpdir . $tmp . "_" . $sample->{file} . ".fq";
        $newfile .= ".gz" if ($sample->{gzip});
        system("ln -s \"" . File::Spec->rel2abs($sample->{path}) . "\" \"" . $newfile . "\"");

        $sample->{path} = $newfile;

        if ($sample->{paired}) {

            $newfile = $tmpdir . $tmp . "_" . $sample->{file2} . ".fq";
            $newfile .= ".gz" if ($sample->{gzip});

            system("ln -s \"" . File::Spec->rel2abs($sample->{path2}) . "\" \"" . $newfile . "\"");

            $sample->{path2} = $newfile;

        }

    }

}

##
# Reads mapping
##

foreach my $sample (@tmp) {

    my ($pre, $command, $file1, $file2);
    $command = $bowtie . " -p " . $wt;
    $file1 = $sample->{path};
    $file2 = $sample->{path2} if ($sample->{paired});

    if (!$mo) {

        $command .= " -3 " . $trim3 if ($trim3);
        $command .= " -5 " . $trim5 if ($trim5);
        $command .= " --norc" if ($bnr);
        $command .= " --nofw" if ($bnf);
        $command .= " -a" if ($ba);
        $command .= " -k " . $bk if ($bk);
        $command .= $bowtie2 ? " -L " . $bseed : " -l " . $bseed;

    }

    if ($bowtie2) { # Bowtie v2

        if (!$mo) {

            $command .= " -N " . $bN . " -D " . $bD . " -R " . $bR . " --mp " . $bmp . " --dpad " . $bdp;
            $command .= " --local" if ($bs);
            $command .= " --ma " . $bma if ($bs);

        }

        $command .= " --no-mixed" if ($sample->{paired});
        $command .= " --dovetail" if ($sample->{paired} &&
                                      $bd);
        $command .= " --no-unal -x " . $bi . " -S \"" . $tmpdir . $tmp . "_" . $sample->{id} . ".sam\"";

    }
    else { # Bowtie v1

        if (!$mo) {

            $command .= defined $bv ? " -v " . $bv : " -n " . $bn;
            $command .= " -m " . $bm . "  --best --strata --chunkmbs " . $bc;

        }

        $command .= " -S -x \"" . $bi . "\"";

    }

    $command .= " " . $mp if (defined $mp);
    $command .= $sample->{paired} ? " -1 \"" . $file1 . "\" -2 \"" . $file2 . "\"" : " \"" . $file1 . "\"";
    $command .= " | grep -v '4\t\\*\t0\t0' > \"" . $tmpdir . $tmp . "_" . $sample->{id} . ".sam\"" if (!$bowtie2); # Unmapped reads filtering for Bowtie v1
    
    $processmanager->enqueue( command => $command,
                              id      => $sample->{id},
                              stderr  => $logdir . $sample->{id} . "_mapping.log" );

}

$progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                    colored => 1 );
$progressBar->init("Mapping reads");

$processmanager->parentOnExit(sub { 
    
    die "\n\n  [!] Error: An error has occurred while mapping sample \"$_[0]\".\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

    $progressBar->update(1); 
    
});

$processmanager->start();
$processmanager->waitall();

$table = Term::Table->new(indent => 2);
$table->head("Sample", "\% Mapped", "\% Failed", $bowtie2 ? "\% Multiple" : "\% Suppressed");

for (my $i = 0; $i < @tmp; $i++) {

    my ($sample, $total, @stats);
    $sample = $tmp[$i];
    @stats = bowtie_log($logdir . $sample->{id} . "_mapping.log", $sample->{paired});
    $total = sum(@stats[0..2]);

    die "\n\n  [!] Bowtie log processing failed." .
        "\n      Please ensure that your Bowtie installation works properly and try again.\n\n" if (!$total);

    @stats = map { sprintf("%.2f", $_ / $total * 100) . "\%" } @stats;
    $table->row($sample->{id}, @stats[0..2]);

    if ($stats[3] == 0) {

        splice(@tmp, $i, 1);
        $i--;

    }

    $sample->{path} = $tmpdir . $tmp . "_" . $sample->{id}  . ".sam";

}

if (!@tmp) {

    cleanup();

    die "\n\n  [!] Mapping failed for all samples." .
        "\n      Please check the transcriptome assembly and try again.\n\n";

}

print "\n\n[+] Mapping statistics:\n\n";
$table->print();
print "\n\n";

foreach my $sample (@tmp) {

    my $path = $output . $sample->{id} . ($nobam ? ".sam" : ".bam");

    $processmanager->enqueue( command => $samtools . " sort --threads " . $wt . " -O " . ($nobam ? "SAM" : "BAM") . " -T \"" . $tmpdir . $tmp . "_" . $sample->{id} . "\" -o \"" . $path . "\" \"" . $sample->{path} . "\"",
                              id      => $sample->{id},
                              stderr  => $logdir . $sample->{id} . "_samtools.log");

    $sample->{path} = $path;

}

$progressBar = Term::Progress->new( max     => $processmanager->queueSize(),
                                    colored => 1 );
$progressBar->init("Sorting alignments");

$processmanager->parentOnExit(sub { 
    
    die "\n\n  [!] Error: Unable to perform sorting on sample \"$_[0]\".\n\n" if ($processmanager->dequeue($_[2])->exitcode()->[0]);

    $progressBar->update(1); 
    
});

$processmanager->start();
$processmanager->waitall();

cleanup();

rmtree($logdir) if (!$keeplogs);

print "\n\n[+] All done.\n\n";

sub cleanup { rmtree($tmpdir); }

sub bowtie_log {

    my $log = shift;
    my $paired = shift;

    my @stats = (0, 0, 0, 0);

    open(my $fh, "<", $log) or die "\n  [!] Error: Unable to open Bowtie log file (" . $! . ")\n\n";
    while(my $row = <$fh>) {

        if ($bowtie2) {

            if ($paired) {

                if ($row =~ m/(\d+) \([\d\.\%]+\) aligned concordantly exactly 1 time/) { $stats[0] = $1; }
                elsif ($row =~ m/(\d+) \([\d\.\%]+\) aligned concordantly 0 times/) { $stats[1] = $1; }
                elsif ($row =~ m/(\d+) \([\d\.\%]+\) aligned concordantly >1 times/) { $stats[2] = $1; }

            }
            else {

                if ($row =~ m/(\d+) \([\d\.\%]+\) aligned exactly 1 time/) { $stats[0] = $1; }
                elsif ($row =~ m/(\d+) \([\d\.\%]+\) aligned 0 times/) { $stats[1] = $1; }
                elsif ($row =~ m/(\d+) \([\d\.\%]+\) aligned >1 times/) { $stats[2] = $1; }

            }

        }
        else {

            if ($row =~ m/reads with at least one (?:reported )?alignment: (\d+) /) { $stats[0] = $1; }
            elsif ($row =~ m/reads that failed to align: (\d+) /) { $stats[1] = $1; }
            elsif ($row =~ m/reads with alignments suppressed due to -m: (\d+) /) { $stats[2] = $1; }

        }

    }
    close($fh);

    $stats[3] = $bowtie2 ? $stats[0] + $stats[2] : $stats[0];

    return(@stats);

}

sub checkfq {

    my $file = shift;

    my ($type);

    if ($file !~ m/\.(?:fq|fastq)(?:\.gz)?$/i) {

        my ($mode, @rows);
        $mode = "<";

        if ($file =~ m/\.gz$/) {

            $file = "gunzip -c " . $file;
            $mode = "-|";

        }

        open(my $fh, $mode, $file) or die "  [!] Error: Unable to open sample \"" . $file . "\" (" . $! . ")\n\n";
        for (0 .. 3) { $rows[$_] = <$fh>; }
        close($fh);

        return if ($rows[0] !~ m/^@/ ||
                   !isdna($rows[1]) ||
                   $rows[2] !~ m/^\+/);

    }

    return(1);

}

sub help {

    print "\n  [!] Error: Invalid option. Please check the help\n" if ($_[0]);

    die <<HELP;

 RF Map (v$Core::Utils::VERSION)
 RNA Framework [http://www.rnaframework.com]

 Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
 Summary: Performs reads pre-processing and mapping

 Usage:   rf-map [Options] Sample1_R1.fastq[,Sample1_R2.fastq] ... Samplen_R1.fastq[,Samplen_R2.fastq]
          rf-map [Options] Sample1_R1.fastq[,Sample1_R2.fastq] ... Samplen_R1.fastq[,Samplen_R2.fastq]      # With sample labels

 Options                                      Description
 -b2 or --bowtie2                             Uses Bowtie v2 for reads mapping (Default: Bowtie v1)
 -p  or --processors           <int>          Number of processors to use (Default: 1)
 -wt or --working-threads      <int>          Number of working threads to use for each instance of the aligner software (Default: 1).
                                              Note: RF Map executes 1 instance of the aligner for each processor specified by -p.
                                                    At least -p <processors> * -wt <threads> processors are required.
 -o  or --output-dir           <string>       Output directory (Default: rf_map/)
 -t  or --tmp-dir              <string>       Temporary directory (Default: <output>/tmp)
 -ow or --overwrite                           Overwrites output directory (if the specified path already exists)
 -nb or --no-bam                              Disables mapped SAM files conversion to BAM format
 -b  or --bowtie               <string>       Path to Bowtie v1/v2 executable (Default: assumes Bowtie is in PATH)
 -c  or --cutadapt             <string>       Path to Cutadapt executable (Default: assumes Cutadapt is in PATH)
 -s  or --samtools             <string>       Path to SAMTools executable (Default: assumes SAMTools is in PATH)
 -kl or --keep-logs                           Disables logs folder deletion (mostly for debugging purposes)

 Cutadapt options
 -ca5 or --cutadapt-5adapter       <string>     Sequence of 5' adapter to clip (Default: GTTCAGAGTTCTACAGTCCGACGATC)
                                                Note #1: Sequence of 5' adapter will be automatically reverse-complemented.
                                                Note #2: Multiple adapter sequences can be provided as a comma-separated list.
 -ca3 or --cutadapt-3adapter       <string>     Sequence of 3' adapter to clip (Default: AGATCGGAAGAGCACACGTCT)
                                                Note: Multiple adapter sequences can be provided as a comma-separated list.
 -cq5 or --cutadapt-5quality       <int>        Quality threshold for trimming bases from read 5'-ends (Phred+33, Default: 0 [no trimming])
                                                Note: 5'-end quality trimming must be avoided when analyzing data from RT-stop-based methods
 -cq3 or --cutadapt-3quality       <int>        Quality threshold for trimming bases from read 3'-ends (Phred+33, Default: 20)
 -cqo or --cutadapt-quality-only                Disables adapters clipping (only performs quality-based trimming)
 -cl  or --cutadapt-len            <int>        Minimum length to keep reads after clipping (>=10, Default: 25)
 -cm  or --cutadapt-min-align      <int>        Minimum alignment in nt to adapter's sequence (>0, Default: 1)
 -ctn or --cutadapt-trim-N                      Trims Ns at the end of read
 -cmn or --cutadapt-max-N          <float>      Discards reads with more than this number of Ns (Default: off)
                                                Note: if a value between 0 and 1 is provided, this is interpreted as a fraction of read's length
 -cp  or --clipped                              Assumes that the provided FastQ files have been already clipped

 Mapping options
 -mp  or --mapping-params      <string>      Manually specify additional aligner parameters (e.g. -mp "-n 2 -l 15")
                                             Note: for a complete list of aligner's parameters, please check aligner's documentation
 -mo  or --manual-only                       Only uses manually specified aligner's parameters.
                                             Any other parameter, except -bi (or --bowtie-index), will be ignored
 -bk  or --bowtie-k            <int>         Reports up to this number of mapping positions for reads (Default: disabled)
 -ba  or --bowtie-all                        Reports all mapping positions for reads (Default: disabled)
 -bnr or --bowtie-norc                       Maps only to transcript's sense strand (Default: both strands)
 -bnf or --bowtie-nofw                       Maps only to transcript's antisense strand (Default: both strands)
 -b5  or --bowtie-trim5        <int>         Number of bases to trim from 5'-end of reads (>=0 bases, Default: 0)
 -b3  or --bowtie-trim3        <int>         Number of bases to trim from 3'-end of reads (>=0 bases, Default: 0)
 -bi  or --bowtie-index        <string>      Path to transcriptome reference index

 |
 +- Bowtie v1 options
    -bl  or --bowtie-seedlen      <int>            Seed length (>=5, Default: 28)
    -bn  or --bowtie-n            <int>            Uses Bowtie in -n mode (0-3, Default: 2)
    -bv  or --bowtie-v            <int>            Uses Bowtie in -v mode (0-3, Default: disabled)
    -bm  or --bowtie-max          <int>            Discards alignment if more than this number of alignments exist (Default: 1)
    -bc  or --bowtie-chunkmbs     <int>            Maximum MB of RAM for best-first search frames (Default: 128)

 |
 +- Bowtie v2 options
    -bl  or --bowtie-seedlen      <int>            Seed length (3<=l<=32, Default: 22)
    -bN  or --bowtie-N            <int>            Bowtie seed mismatches (0-1 mismatches, Default: 1)
    -bD  or --bowtie-D            <int>            Maximum number of seed extension attempts (>=0, Default: 15)
    -bR  or --bowtie-R            <int>            Maximum number of re-seeding attempts for reads with repetitive seeds (>=0, Default: 2)
    -bmp or --bowtie-mp           <int>[,<int>]    Maximum and minimum mismatch penalities (>=0, Default: 6,2)
    -bdp or --bowtie-dpad         <int>            Number of extra reference bases included on sides of the DP table (>=0, Default: 15)
    -bdg or --bowtie-rdg          <int>[,<int>]    Read's gap open and extend penalities (>=0, Default: 5,3)
    -bfg or --bowtie-rfg          <int>[,<int>]    Reference's gap open and extend penalities (>=0, Default: 5,3)
    -bs  or --bowtie-softclip                      Enables local alignment mode (Default: entire read must align)
    -bma or --bowite-ma           <int>            Match bonus in local alignment mode (>=0, Default: 2)
    -bd  or --bowtie-dovetail                      Paired-end dovetailed reads are allowed (mates extend past each other)


HELP

}
