#!/usr/bin/env perl

##
# RF Mutate
# RNA Framework [http://www.rnaframework.com]
#
# Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
# Summary: Designs structure mutants and rescues
#
# This program is free software, and can be redistribute  and/or modified
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# Please see <http://www.gnu.org/licenses/> for more informations.
##

use strict;
use Config;
use Cwd;
use File::Basename;
use FindBin qw($Bin);
use Getopt::Long qw(:config no_ignore_case);
use List::Util;
use POSIX qw(floor);
use threads;
use threads::shared;

use lib $Bin . "/lib";

use Core::Mathematics qw(:all);
use Core::Statistics;
use Core::Utils;
use Data::IO;
use Data::XML;
use Data::IO::Sequence;
use Data::Sequence::Utils;
use RNA::Utils;
use Term::Constants qw(:screen);

$|++;

die "\n  [!] Error: This program requires ithreads." .
    "\n             Please recompile Perl with ithreads support and try again\n\n" unless(defined $Config{useithreads});

my ($help, $output, $overwrite, $orffile,
    $motiffile, $mindist, $maxiter, $maxeval,
    $tolerance, $maxresults, $threads, $autoorf,
    $startalt, $startany, $input, $minorf,
    $tmpdir, $imported, $cwd, $norescue, $gencode, 
    $nmutations, $rnafold, $noensprob, $exclcodons, $targetFile, 
    $maxDistToTarget, $installed, @pool, %ncmutable, %cmutable, 
    %targetStructs);

my @ids : shared;
my %input : shared;
my %results : shared;
%results = ( success   => 0,
             fail      => 0,
             parseerr  => 0,
             orferr    => 0,
             motiferr  => 0,
             targeterr => 0 );

do {

    local $SIG{__WARN__} = sub { };

    GetOptions( "h|help"                  => \$help,
                "o|output-dir=s"          => \$output,
                "ow|overwrite"            => \$overwrite,
                "of|orf-file=s"           => \$orffile,
                "mf|motif-file=s"         => \$motiffile,
                "tf|target-file=s"        => \$targetFile,
                "md|min-distance=s"       => \$mindist,
                "mi|max-iterations=i"     => \$maxiter,
                "t|tolerance=s"           => \$tolerance,
                "me|max-evaluate=i"       => \$maxeval,
                "mr|max-results=i"        => \$maxresults,
                "lo|longest-orf"          => \$autoorf,
                "mo|min-orf-length=i"     => \$minorf,
                "als|alt-start"           => \$startalt,
                "ans|any-start"           => \$startany,
                "nr|no-rescue"            => \$norescue,
                "p|processors=i"          => \$threads,
                "gc|genetic-code=i"       => \$gencode,
                "nm|n-mutations=i"        => \$nmutations,
                "nr|no-rescue"            => \$norescue,
                "ne|no-ensemble-prob"     => \$noensprob,
                "vrf|vienna-rnafold=s"    => \$rnafold,
                "ec|exclude-codons=s"     => \$exclcodons,
                "mt|max-dist-to-target=s" => \$maxDistToTarget ) or help(1);

    $input = $ARGV[0];

};

help() if ($help);

$threads ||= 1;
$output ||= "rf_mutate/";
$mindist ||= 0.5;
$maxiter ||= 1000;
$maxeval ||= 1000;
$tolerance //= 0.2;
$maxDistToTarget //= 0.2;
$minorf ||= 50;
$gencode ||= 1;
$nmutations ||= 1;
$rnafold ||= which("RNAfold");
$output =~ s/\/?$/\//;
$tmpdir = $output . "tmp/";
$imported = 0;
$cwd = cwd();
$installed = eval { require RNA; 1; };
%ncmutable = ( A => [ qw(C U) ],
               C => [ qw(A G) ],
               G => [ qw(C U) ],
               U => [ qw(A G) ] );

die "\n  [!] Error: Number of processors must be an integer greater than 0\n\n" if ($threads < 1);
die "\n  [!] Error: Minimum distance must be comprised between >0 and 1\n\n" if (!inrange($mindist, [0, 1]));
die "\n  [!] Error: Tolerance must be comprised between 0 and 1\n\n" if (!inrange($tolerance, [0, 1]));
die "\n  [!] Error: Maximum distance to target must be comprised between 0 and 1\n\n" if (defined $targetFile && !inrange($maxDistToTarget, [0, 1]));
die "\n  [!] Error: No motif file provided\n\n" if (!defined $motiffile);
die "\n  [!] Error: Provided motif file does not exist\n\n" if (!-e $motiffile);
die "\n  [!] Error: Provided target file does not exist\n\n" if (defined $targetFile && !-e $targetFile);
die "\n  [!] Error: Provided ORF file does not exist\n\n" if (defined $orffile && !-e $orffile);
die "\n  [!] Error: No path to the ViennaRNA RNAfold executable provided\n\n" if (!$rnafold && !$noensprob);
die "\n  [!] Error: Parameters -als (or --alt-start) and -ans (or --any-start) are mutually exclusive\n\n" if ($startalt && $startany);
die "\n  [!] Error: Maximum results must be a positive integer > 0\n\n" if (defined $maxresults && (!ispositive($maxresults) || !isint($maxresults)));

die "\n  [!] Error: ViennaRNA package's Perl bindings are required." .
    "\n             Please ensure that ViennaRNA package v2.7.0 (or greater) is installed and try again\n\n" unless($installed);

checkvienna();
$RNA::noLonelyPairs = 1;

if (defined $orffile ||
    $autoorf) {

    # Builds the %cmutable table, containing the alternative codons
    my ($gentable, %codons);
    $gentable = gencode($gencode);

    die "\n  [!] Error: Invalid genetic code table \"" . $gencode . "\"\n\n" unless ($gentable);

    push(@{$codons{$gentable->{$_}}}, $_) for (keys %{$gentable});

    if ($exclcodons) { # Rare codons to be avoided

        my @exclcodons = split(/[,;]/, $exclcodons);

        for (@exclcodons) { die "\n  [!] Error: Invalid excluded codon \"" . $_ . "\"\n\n" if (!isiupac($_) || length($_) != 3); }

        $exclcodons = join("|", map { iupac2regex($_) } @exclcodons);

    }

    foreach my $aa (keys %codons) {

        my @altcodons = @{$codons{$aa}};

        for(my $i = 0; $i < @altcodons; $i++) {

            my (@codons);
            @codons = @altcodons[0 .. $i - 1] if ($i);
            @codons = (@codons, @altcodons[$i + 1 .. $#altcodons]) if ($i < $#altcodons);
            $cmutable{$altcodons[$i]} = [ grep { $_ !~ m/^$exclcodons$/ } @codons ];

        }

    }

}
else {

    print "\n  [!] Note: No ORFs file has been provided and --longest-orf is disabled.".
          "\n            All transcripts will be treated as non-coding RNAs.\n";

}

# Output directory tree
print "\n[+] Making output directory tree...";

if (-e $output) {

    if ($overwrite) {

        my $error = rmtree($output);

        die "\n\n  [!] Error: " . $error . "\n\n" if ($error);

    }
    else { die "\n\n  [!] Error: Output directory already exists." .
               "\n      Please use -ow (or --overwrite) to overwrite output directory\n\n"; }

}

if (my $error = mktree($tmpdir, "755")) { die "\n  [!] Error: Unable to create output directory ($error)\n\n"; }

# Sets all paths to absolute, before changing working directory to temp dir
$output = File::Spec->rel2abs($output) . "/";
$tmpdir = File::Spec->rel2abs($tmpdir) . "/";

print "\n[+] Importing motif(s)";

open(my $mh, "<", $motiffile) or die "\n\n  [!] Error: Unable to read motif file\n\n";
while(my $line = <$mh>) {

    chomp($line);

    next if ($line =~ m/^\s?#/);

    my @line = split(/[,;]/, $line);

    next if (@line < 2);

    foreach my $i (1 .. $#line) {

        $line[$i] =~ s/\s//g;

        if ((isdotbracket($line[$i]) && isdbbalanced($line[$i])) ||
            (isint($line[$i]) && ispositive($line[$i]))) {

            $input{$line[0]} //= shared_clone({ file   => undef,
                                                motifs => shared_clone([]),
                                                orf    => shared_clone({ start => undef,
                                                                         end   => undef,
                                                                         aa    => undef })});
            push(@{$input{$line[0]}->{motifs}}, $line[$i]);

            $imported++;

        }

    }

}
close($mh);

print " [" . $imported . " imported]";

die "\n\n  [!] Error: No motif imported. Check motif file syntax and try again.\n\n" unless($imported);

print "\n[+] Importing structure files [0 imported]";

if (-d $input) {

    opendir(my $dh, $input) or die "\n\n  [!] Error: Unable to read structure files from directory\n\n";
    while(my $file = readdir($dh)) {

        if ($file =~ m/^(.+?)\.(?:fa(?:sta)?|db|ct)$/) {

            my $id = $1;

            next if (!exists $input{$id});

            $input{$id}->{file} = File::Spec->rel2abs($input . "/" . $file);

        }

    }
    closedir($dh);

}
else { # Single structure file

    if ($input =~ m/([^\/]+?)\.(?:fa(?:sta)?|db|ct)$/) {

        my $id = $1;

        $input{$id}->{file} = File::Spec->rel2abs($input) if (exists $input{$id});

    }
    else { die "\n\n  [!] Error: Provided file does not have a valid extension\n\n"; }

}

for (keys %input) { delete($input{$_}) if (!defined $input{$_}->{file}); }

if (my $structcount = scalar(grep { defined $input{$_}->{file} } keys %input)) { print CLRRET . "[+] Importing structure file(s) [" . $structcount . " imported]"; }
else { die "\n\n  [!] Error: No structure file matches any of the sequence IDs in the motif file\n\n"; }

if ($orffile) { # ORFs specified

    print "\n[+] Importing ORF(s) [0 imported]";

    open(my $oh, "<", $orffile) or die "\n\n  [!] Error: Unable to read ORF file\n\n";
    while(my $line = <$oh>) {

        chomp($line);

        next if ($line =~ m/^\s?#/);

        my @line = split(/[,;]/, $line);

        next if (@line < 2);

        if (exists $input{$line[0]}) {

            if (@line > 2) { warn "\n\n  [!] Warning: Multiple ORFs in file for transcript \"" . $line[0] . "\"." .
                                  "\n               Only the first ORF will be considered\n\n"; }

              if (isaa($line[1])) { $input{$line[0]}->{orf}->{aa} = $line[1]; }
              elsif ($line[1] =~ m/^\d+(\-\d+)?$/) { ($input{$line[0]}->{orf}->{start}, $input{$line[0]}->{orf}->{end}) = split("-", $line[1]); }

        }

    }
    close($oh);

    if (my $orfcount = scalar(grep { defined $input{$_}->{orf}->{start} ||
                                     defined $input{$_}->{orf}->{aa} } keys %input)) { print CLRRET . "[+] Importing ORF(s) [" . $orfcount . " imported]"; }
    else {

        warn "\n\n  [!] Warning: No ID in ORF file matches any of the sequence IDs in the motif file." .
                "\n               All transcripts will be treated as non-coding RNAs.\n\n";

    }

}

if ($targetFile) {

    $imported = 0;

    print "\n[+] Importing target structures";

    open(my $th, "<", $targetFile) or die "\n\n  [!] Error: Unable to read target file\n\n";
    while(my $line = <$th>) {

        chomp($line);

        next if ($line =~ m/^\s?#/);

        my @line = split(/[,;]/, $line);

        next if (@line < 2);
        next if (!exists $input{$line[0]});

        foreach my $i (1 .. $#line) {

            $line[$i] =~ s/\s//g;

            my @target = split(/:/, $line[$i]);

            if (isint($target[0]) && ispositive($target[0]) && 
                isdotbracket($target[1]) && isdbbalanced($target[1])) {

                $targetStructs{$line[0]}->{$target[0]} = $target[1];
                $imported++;

            }

        }

    }
    close($th);

    print " [" . $imported . " imported]";

    warn "\n\n  [!] Error: No target structure imported. Check target file syntax. Ignoring...\n\n" unless($imported);

}

chdir($tmpdir);  # Change the working directory before starting threads

print "\n[+] Generating structure mutants" . ($norescue ? " " : " and rescues ") . "[Last: none]";

@pool = map{ threads->create(\&mutate) } 1 .. $threads;
$_->join() for (@pool);

chdir($cwd);  # Reset to original working directory

print "\n[+] Mutagenesis statistics:\n" .
      "\n  [*] Mutated motifs:   " . $results{success} .
      "\n  [*] Discarded motifs: " . (sum(map { $results{$_} } keys %results) - $results{success}) . " total" .
      "\n                        " . $results{parseerr} . " structure file parsing failed" .
      "\n                        " . $results{motiferr} . " motif not found" .
      "\n                        " . $results{orferr} . " invalid ORF" .
      "\n                        " . $results{targeterr} . " invalid target structure" .
      "\n                        " . $results{fail} . " mutagenesis" . ($norescue ? " " : "/rescue ") . "failed" ;

rmtree($tmpdir);
rmtree($output) unless($results{success});

print "\n\n[+] All done.\n\n";

sub mutate {

    while(1) {

        my ($input, $id, $entry, %helices);

        { lock(%input);
          $id = (sort keys %input)[0];

          if ($id) {

              $input = $input{$id};
              delete($input{$id});

          } }

        last unless($id);

        my $eval = do { local $@;
                        eval { $entry = Data::IO::Sequence->new( file        => $input->{file},
                                                                 lonelypairs => 1 )->read(); };
                        $@; };

        if ($eval || !$entry || ($entry && !$entry->can("structure"))) {

            lock(%results);
            $results{parseerr}++;

            undef($@);

            next;

        }

        # Table of motif start => end
        %helices = map { $_->h5start() => $_->h3start() } $entry->helices();

        if (defined $input->{orf}->{start} ||
            defined $input->{orf}->{aa}) {

            if (defined $input->{orf}->{start}) {

                if (!$input->{orf}->{end}) { # Only start of ORF has been defined

                    my ($aa, $stopindex);
                    $aa = translate(substr($entry->sequence(), $input->{orf}->{start}), $gencode);
                    $stopindex = index($aa, "*", 0);

                    if ($stopindex == -1) { $input->{orf}->{end} = $entry->length() - 1; } # no stop codon found in frame
                    elsif ($stopindex > 0) { $input->{orf}->{end} = $input->{orf}->{start} + ($stopindex * 3) + 2; } # convert stop position in aa seq in nt coordinates
                                                                                                                     # nb. stop cannot be the first codon

                }

            }
            else { #aa sequence provided

                foreach my $frame (0 .. 2) {

                    my ($aa, $orfindex);
                    $aa = translate(substr($entry->sequence(), $frame), $gencode);
                    $orfindex = index($aa, $input->{orf}->{aa}, 0);

                    if ($orfindex != -1) {

                        my $stopindex = index($aa, "*", $orfindex);
                        $input->{orf}->{start} = ($orfindex * 3) + $frame;
                        $input->{orf}->{start} -= 3 while ($input->{orf}->{start} >= 3 &&
                                                           substr($aa, $input->{orf}->{start} - 3, 1) ne "*");

                        $input->{orf}->{end} = $stopindex == -1 ? $entry->length() - 1 : ($stopindex * 3) + $frame + 2;
                        last;

                    }

                }

            }

            if (!$input->{orf}->{end}) {

                    lock(%results);
                    $results{orferr}++;

                    next;

            }

        }
        elsif ($autoorf) {

            my @orf = longestorf($entry->sequence(), { gencode     => $gencode,
                                                       altstart    => $startalt,
                                                       ignorestart => $startany,
                                                       minlength   => $minorf });

            if (@orf) {

                $input->{orf}->{start} = $orf[1];
                $input->{orf}->{end} = $orf[1] + length($orf[0]) - 1;

            }

        }

        foreach my $motif (uniq(@{$input->{motifs}})) {

            my ($sequence, $structure, $incoding, $itercount,
                $motifEnd, $motifLen, @original, 
                @mutable, @sequence, @variants,
                @results, @motif, @target, %originalpairs, 
                %targetPairs, %bptable, %mutable);
            $itercount = 0;
            $motif = index($entry->structure(), $motif, 0) if (isdotbracket($motif)); # In case the dot-bracket of the motif was provided, first find the corresponding coordinate

            if (!exists $helices{$motif}) {

                lock(%results);
                $results{motiferr}++;

                next;

            }

            $motifEnd = $helices{$motif};
            $motifLen = $motifEnd - $motif + 1;

            if (exists $targetStructs{$id}->{$motif}) {

                my $targetLen = length($targetStructs{$id}->{$motif});

                if ($targetLen + $motif > $entry->length()) {

                    lock(%results);
                    $results{targeterr}++;

                    next;

                }

                # A target structure was specified, but it had a different length than the motif
                if ($targetLen < $motifLen) {
                    
                    $targetStructs{$id}->{$motif} .= "." x ($motifLen - $targetLen);

                }
                else {

                    $motifEnd = $motif + $targetLen - 1;
                    $motifLen = $motifEnd - $motif + 1;

                }

            }

            # The motif is (partially or completely) inside the ORF
            $incoding = 1 if (defined $input->{orf}->{start} &&
                              intersect([$motif, $motifEnd], [$input->{orf}->{start}, $input->{orf}->{end}]));

            if ($incoding) {

                # The motif might not be "in frame", so we will extract the region comprised between the
                # two codons in which the motif is included
                # % 3 -> 1: Ist base of codon; 2: IInd base of codon; 0: IIIrd base of codon
                my ($aah5start, $aah3start);
                $aah5start = (($motif - $input->{orf}->{start} + 1) % 3) ? ((($motif - $input->{orf}->{start} + 1) % 3) == 1 ? $motif : $motif - 1) : $motif - 2;
                $aah3start = (($motifEnd - $input->{orf}->{start} + 1) % 3) ? ((($motifEnd - $input->{orf}->{start} + 1) % 3) == 1 ? $motifEnd + 2 : $motifEnd + 1) : $motifEnd;

                $sequence = substr($entry->sequence(), $aah5start, $aah3start - $aah5start + 1);
                $structure = fixdotbracket(substr($entry->structure(), $aah5start, $aah3start - $aah5start + 1));
                @motif = ($aah5start, $aah3start);
                %mutable = %cmutable;

                # Expand the target structure to be aligned to the codons
                if (exists $targetStructs{$id}->{$motif}) {

                    $targetStructs{$id}->{$motif} = ("." x ($motif - $aah5start)) . $targetStructs{$id}->{$motif};
                    $targetStructs{$id}->{$motif} .= "." x ($aah3start - $motifEnd);

                }

            }
            else {

                $sequence = substr($entry->sequence(), $motif, $motifLen);
                $structure = fixdotbracket(substr($entry->structure(), $motif, $motifLen));
                @motif = ($motif, $motifEnd);
                %mutable = %ncmutable;

            }

            # Get the free energy and base-pairs of the original motif
            @original = ($structure, RNA::energy_of_struct($sequence, $structure));
            %originalpairs = map { $_->[0] => $_->[1],
								   $_->[1] => $_->[0] } listpairs($original[0]);

            # Get the free energy and base-pairs of the target motif, if present
            if (exists $targetStructs{$id}->{$motif}) {

                @target = ($targetStructs{$id}->{$motif}, RNA::energy_of_struct($sequence, $targetStructs{$id}->{$motif}));
                %targetPairs = map { $_->[0] => $_->[1],
                                    $_->[1] => $_->[0] } listpairs($target[0]);

            }

            # %bptable contains the index of the complementary base to each base in an helix (for non-codings)
            # or the index of the complementary codon with the maximum number of base pairs to each codon in an helix
            # @mutable will contain the indices of all the mutable bases (non-coding) or codons (coding)
            if ($incoding) {

                my (@structure);

                for (keys %originalpairs) {

                    $bptable{floor($_ / 3)}->{floor($originalpairs{$_} / 3)}++;
                    $bptable{floor($originalpairs{$_} / 3)}->{floor($_ / 3)}++;

                }

                %bptable = map { $_ => (sort {$bptable{$_}->{$b} <=> $bptable{$_}->{$a}} keys %{$bptable{$_}})[0] } keys %bptable;

                @sequence = ($sequence =~ m/.../g);
                @structure = ($structure =~ m/.../g);
                @mutable = ([grep { ($structure[$_] =~ tr/././) < 2 &&
                                    @{$cmutable{$sequence[$_]}} } 0 .. $#structure]) x $nmutations;  # Selects the index of codons in which at least 2 bases are basepaired
                                                                                                     # and the codon has at least one synonymous codon available

            }
            else {

                %bptable = %originalpairs;

                @sequence = split(//, $sequence);
                @mutable = map { [map { $_, $originalpairs{$_} } grep { $_ > $originalpairs{$_} } keys %originalpairs] } 0 .. $nmutations - 1;

            }

            # Creates random combinations of indexes of bases/codons to mutate
            MUTATE:
            foreach my $set (List::Util::shuffle(uniq(map {[sort {$a <=> $b} @$_]} grep { scalar(uniq(map { min($_, $bptable{$_}) } @$_)) == $nmutations } permute(@mutable)))) {

                foreach my $bases (permute(map { clonearrayref($mutable{$sequence[$_]}) } @{$set})) {

                    my ($mutated, $bpdist, $energydiff, @mutated,
                        @mfolding);
                    @mutated = @sequence;
                    $mutated[$set->[$_]] = $bases->[$_] for (0 .. $#{$set});
                    $mutated = join("", @mutated);

                    $itercount++;

                    @mfolding = RNA::fold($mutated);
                    RNA::free_arrays();

                    $bpdist = exists $targetStructs{$id}->{$motif} ? -bpdistance($mfolding[0], $target[0]) : bpdistance($mfolding[0], $original[0]);
                    $energydiff = exists $targetStructs{$id}->{$motif} ? -abs($mfolding[1] - $target[1]) : abs($mfolding[1] - $original[1]);

                    push(@variants, { vseq     => $mutated,
                                      vstruct  => $mfolding[0],
                                      venergy  => $mfolding[1],
                                      bpdist1  => $bpdist,
                                      ediff1   => $energydiff,
                                      indices1 => [@$set] }) if ((exists $targetStructs{$id}->{$motif} && -$bpdist <= int(length($sequence) * $maxDistToTarget)) ||
                                                                 (!exists $targetStructs{$id}->{$motif} && $bpdist >= int(length($sequence) * $mindist)));

                    last MUTATE if ($itercount == $maxiter ||
                                    @variants == $maxeval);

                }

            }

            if (!$norescue) {

                $itercount = 0;

                RESCUE:
                foreach my $variant (sort {$b->{bpdist1} <=> $a->{bpdist1} ||
                                           $b->{ediff1} <=> $a->{ediff1}} @variants) {

                    my @set = sort {$a <=> $b} map { $bptable{$_} } @{$variant->{indices1}};

                    foreach my $pair (permute(map { clonearrayref($mutable{$sequence[$_]}) } @set)) {

                        my ($rescued, $bpdist, $energydiff, @rescued,
                            @rfolding);
                        @rescued = $incoding ? ($variant->{vseq} =~ m/.../g) : split(//, $variant->{vseq});
                        $rescued[$set[$_]] = $pair->[$_] for (0 .. $#set);
                        $rescued = join("", @rescued);

                        $itercount++;

                        @rfolding = RNA::fold($rescued);
                        RNA::free_arrays();

                        $bpdist = bpdistance($rfolding[0], $original[0]);
                        $energydiff = abs($rfolding[1] - $original[1]);

                        if ($bpdist <= int(length($sequence) * $tolerance)) {

                            push(@results, { vseq     => $variant->{vseq},
                                             vstruct  => $variant->{vstruct},
                                             venergy  => $variant->{venergy},
                                             rseq     => $rescued,
                                             rstruct  => $rfolding[0],
                                             renergy  => $rfolding[1],
                                             bpdist1  => $variant->{bpdist1},
                                             bpdist2  => $bpdist,
                                             ediff1   => $variant->{ediff1},
                                             ediff2   => $energydiff,
                                             indices1 => $variant->{indices1},
                                             indices2 => \@set });

                            last RESCUE if ($itercount == $maxiter ||
                                            @results == $maxeval);

                        }

                    }

                }

            }
            else { @results = @variants;}

            if (@results) {

                my ($xmlio, $xml, $n, $motifattribs);

                if (!$noensprob) {

                    foreach my $result (@results) {

                        my ($motifId, @p1, @p2, %vpairs, %rpairs);
                        $motifId = $entry->id() . "_" . $motif;
                        %vpairs = partition($motifId, $result->{vseq});

                        if (exists $targetStructs{$id}->{$motif}) { @p1 = map { $vpairs{$_}->{$targetPairs{$_}} || 0 } grep { $_ < $targetPairs{$_} } keys %targetPairs; }
                        else { @p1 = map { $vpairs{$_}->{$originalpairs{$_}} || 0 } grep { $_ < $originalpairs{$_} } keys %originalpairs; }

                        $result->{meanprob1} = exists $targetStructs{$id}->{$motif} ? -mean(@p1) : mean(@p1);

                        if (!$norescue) {

                            %rpairs = partition($motifId, $result->{rseq});
                            @p2 = map { $rpairs{$_}->{$originalpairs{$_}} || 0 } grep { $_ < $originalpairs{$_} } keys %originalpairs;
                            $result->{meanprob2} = mean(@p2);

                        }

                    }

                }

                # Sort results
                if (!$noensprob &&
                    !$norescue) {

                    @results = sort { (1 - $b->{meanprob1} + $b->{meanprob2}) <=> (1 - $a->{meanprob1} + $a->{meanprob2}) ||
                                      $a->{bpdist2} <=> $b->{bpdist2} ||
                                      $b->{bpdist1} <=> $a->{bpdist1} ||
                                      $a->{ediff2} <=> $b->{ediff2} ||
                                      $b->{ediff1} <=> $a->{ediff1}} @results;

                }
                elsif (!$noensprob) {

                    @results = sort { $a->{meanprob1} <=> $b->{meanprob1} ||
                                      $b->{bpdist1} <=> $a->{bpdist1} ||
                                      $b->{ediff1} <=> $a->{ediff1}} @results;

                }
                elsif (!$norescue) {

                    @results = sort { $a->{bpdist2} <=> $b->{bpdist2} ||
                                      $b->{bpdist1} <=> $a->{bpdist1} ||
                                      $a->{ediff2} <=> $b->{ediff2} ||
                                      $b->{ediff1} <=> $a->{ediff1}} @results;

                }
                else {

                    @results = sort { $b->{bpdist1} <=> $a->{bpdist1} ||
                                      $b->{ediff1} <=> $a->{ediff1}} @results;

                }

                # Report results
                mktree($output . $entry->id(), "755") if (!-d $output . $entry->id());

                $xmlio = Data::IO->new( file      => $output . $entry->id() . "/motif_" . join("-", $motif, $motifEnd) . ".xml",
                                        mode      => "w",
                                        binmode   => ":encoding(utf-8)",
                                        verbosity => -1 );
                $xml = Data::XML->new( heading   => 1,
                                       indent    => 0,
                                       autoclose => 1 );
                $n = 0;
                $motifattribs = { id       => $entry->id(),
                                  position => join("-", $motif, $motifEnd),
                                  energy   => sprintf("%.2f", $original[1]) };
                $motifattribs->{frame} = join("-", @motif) if ($incoding);

                $xml->opentag("motif", $motifattribs);

                foreach my $result (@results[0 .. min(scalar(@results), $maxresults || scalar(@results)) - 1]) {

                    $xml->opentag("result", { n => $n });
                    $xml->opentag("mutant", { probability => $noensprob ? "NaN" : sprintf("%.2f", exists $targetStructs{$id}->{$motif} ? -$result->{meanprob1} : $result->{meanprob1}),
                                              distance    => exists $targetStructs{$id}->{$motif} ? -$result->{bpdist1} : $result->{bpdist1},
                                              ddG         => exists $targetStructs{$id}->{$motif} ? -sprintf("%.2f", $result->{ediff1}) : sprintf("%.2f", $result->{ediff1}),
                                              energy      => sprintf("%.2f", $result->{venergy}),
                                              ($incoding ? "codons" : "bases") => join(",", @{$result->{indices1}}) });
                    $xml->tagline("sequence", $result->{vseq});
                    $xml->tagline("structure", $result->{vstruct});
                    $xml->closelasttag();

                    if (!$norescue) {

                        $xml->opentag("rescue", { probability => $noensprob ? "NaN" : sprintf("%.2f", $result->{meanprob2}),
                                                  distance    => $result->{bpdist2},
                                                  ddG         => sprintf("%.2f", $result->{ediff2}),
                                                  energy      => sprintf("%.2f", $result->{renergy}),
                                                  ($incoding ? "codons" : "bases") => join(",", @{$result->{indices2}}) });
                        $xml->tagline("sequence", $result->{rseq});
                        $xml->tagline("structure", $result->{rstruct});
                        $xml->closelasttag();

                    }

                    $xml->closelasttag();

                    $n++;

                }

                $xmlio->write($xml->xml());

                { lock(%results);
                  $results{success}++;

                  print CLRRET . "[+] Generating structure mutants" . ($norescue ? " " : " and rescues ") . "[Last: " . $entry->id() . " (motif: " . $motif . ")]" }

            }
            else {

                lock(%results);
                $results{fail}++;

                next;

            }

        }

    }

    threads->exit();

}

sub checkvienna {

    my $ret = `$rnafold --version`;

    if ($ret =~ m/RNAfold (\d+)\.(\d+)/) {

        my ($v1, $v2) = ($1, $2);

        die "\n\n  [!] Error: RF Fold requires ViennaRNA package v2.2.0 or greater (Detected: v" . $v1 . "." . $v2 . ")\n\n" if ($v1 < 2 ||
                                                                                                                                  $v2 < 2);

    }
    else { die "\n\n  [!] Error: Unable to detect ViennaRNA package version\n\n"; }

}

# Returns base-pairing probabilities
sub partition {

    my ($id, $sequence) = @_;

    my ($cmd, $ret, %pairs);

    open(my $wh, ">", $id . ".fasta") or return();
	print $wh ">" . $id . "\n" . $sequence;
	close($wh);

	$cmd = $rnafold . " --noPS -p --infile='" . $id . ".fasta' --noLP";
	$ret = `$cmd 2>/dev/null`;

	open(my $fh, "<", $id . "_dp.ps") or return();
	while(<$fh>) {

		if ($_ =~ m/^(\d+) (\d+) ([\d\.]+(?:e-\d+)?) ubox$/) {

			my ($i, $j, $p) = ($1, $2, $3);

			$i -= 1;         # Base numbering is 1-based
			$j -= 1;
			$p = $p ** 2;    # ViennaRNA returns sqrt(p(i,j))

			$pairs{$i}->{$j} = $p;

		}

	}
	close($fh);

	unlink(glob($id . "*"));

	return(%pairs);

}

sub _permute {

    my ($a, $b) = @_;

    if (@{$a} > $maxeval) {

        @{$a} = List::Util::shuffle(@{$a});
        @{$a} = @{$a}[0 .. $maxeval - 1];

    }

    my (@set);

    for my $x (@{$a}) {

        for my $y (@{$b}) {

            if (ref($x) ne "ARRAY") { push(@set, [$x, $y]); }
            else { push(@set, [ (@{$x}, $y) ]);
            }
        }
    }

    return(\@set);

}

sub permute {

    my ($set, @permutations);

    if (@_ > 1) {

        $set = List::Util::reduce { _permute($a, $b) } @_;
        @permutations = grep { @$_ == $nmutations &&
                               List::Util::all { length() } @$_ } @{$set};

    }
    else { @permutations = map { [$_] } @{$_[0]}; }

    if (@permutations > $maxeval) {

        @permutations = List::Util::shuffle(@permutations);
        @permutations = @permutations[0 .. $maxeval - 1];

    }

    return(@permutations);

}

sub help {

    print "\n  [!] Error: Invalid option. Please check the help\n" if ($_[0]);

    die <<HELP;

 RF Mutate (v$Core::Utils::VERSION)
 RNA Framework [http://www.rnaframework.com]

 Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
 Summary: Designs structure mutants and rescues

 Usage:   rf-mutate [Options] structures_folder/                 # Whole transcriptome
          rf-mutate [Options] transcript.(fa|fasta|db|ct)        # Single transcript

 Options                                             Description
 -p   or --processors          <int>                 Number of processors to use (Default: 1)
 -o   or --output-dir          <string>              Output directory (Default: rf_mutate/)
 -ow  or --overwrite                                 Overwrites output directory (if the specified path already exists)
 -mf  or --motif-file          <string>              Path to a file containing the list of motifs to mutate (mandatory)
 -tf  or --target-file         <string>              Path to a file containing a list of target structures the motifs 
                                                     should fold into upon mutagenesis (optional)
 -of  or --orf-file            <string>              Path to a file containing transcript ORFs (optional)
 -lo  or --longest-orf                               Automatically finds the longest ORF
 -mo  or --min-orf-length      <int>                 Minimum length (in aa) to select the longest ORF (requires -lo, Default: 50)
 -als or --alt-start                                 Longest ORF is allowed to start with alternative start codon (requires -lo)
 -ans or --any-start                                 Longest ORF is allowed to start with any codon (requires -lo)
 -gc  or --genetic-code        <int>                 Genetic code table for the reference organism (1-33, Default: 1)
 -ec  or --exclude-codons      <string>              A comma (or semicolon) separated list of rare codons to be avoided
 -md  or --min-distance        <float>               Minimum (fractional) base-pair distance between wild-type and mutant (>0-1, Default: 0.5)
 -t   or --tolerance           <float>               Maximum (fractional) base-pair distance between wild-type and rescue (0-1, Default: 0.2)
 -mt  or --max-dist-to-target  <float>               Maximum (fractional) base-pair distance between mutant and target structure (0-1, Default: 0.2)
 -mi  or --max-iterations      <int>                 Maximum number of iterations (>0, Default: 1000)
 -me  or --max-evaluate        <int>                 Maximum number of mutants to evaluate (>0, Default: 1000)
 -mr  or --max-results         <int>                 Maximum number of mutants to report per motif (Default: all)
 -nm  or --n-mutations         <int>                 Number of bases (or codons) to simultaneously mutate (>0, Default: 1)
 -nr  or --no-rescue                                 Disables design of rescue mutations
 -ne  or --no-ensemble-prob                          Disables evaluation of mutant/rescue Boltzmann ensemble
 -vrf or --vienna-rnafold      <string>              Path to ViennaRNA RNAfold executable (Default: assumes RNAfold is in PATH)

HELP

}
