#!/usr/bin/env perl

##
# RF JSON2RC
# RNA Framework [http://www.rnaframework.com]
#
# Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
# Summary: Extract information for deconvoluted structure profiles from DRACO JSON output,
#          producing RC files suitable for structure prediction
#
# This program is free software, and can be redistribute  and/or modified
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.
#
# Please see <http://www.gnu.org/licenses/> for more informations.
##

use strict;
use Config;
use File::Basename;
use FindBin qw($Bin);
use Getopt::Long qw(:config no_ignore_case);
use threads;
use threads::shared;

use lib $Bin . "/lib";

use Core::Mathematics qw(:all);
use Core::Statistics;
use Core::Utils;
use Data::IO::Sequence;
use Data::Sequence::Utils;
use RF::Data::IO::RC;
use RF::Data::RC;
use Term::Constants qw(:screen);
use Term::Table;

BEGIN {

    my ($class);

    for (qw(Cpanel/JSON/XS.pm JSON/XS.pm JSON/PP.pm)) {

        $class = $_ if (eval { require $_; 1; });

        last if ($class);

    }

    if ($class) {

        $class =~ s/\//::/g;
        $class =~ s/\.pm$//;
        $class->import("decode_json");

        warn "\n  [!] Note: for better performances, it is recommended to install JSON::XS\n" if ($class eq "JSON::PP");

    }
    else { die "\n  [!] Error: No JSON IO module available\n\n"; }

}

$|++;

die "\n  [!] Error: This program requires ithreads." .
    "\n             Please recompile Perl with ithreads support and try again\n\n" unless(defined $Config{useithreads});

my ($help, $output, $overwrite, $rcin, $needRC,
    $jsonin, $medianPreCov, $medianCov, $data, 
    $minConf, $maxConf, $extend, $corrByMajority,
    $corrAll, $surroundToRC, $ignoreTerm, $minOverlapReps,
    $rcindex, $spearman, $minCorrMatch, $acOnly,
    $noMergeOverlapping, $minOverlapMerge, $minCorrMerge, 
    $success, $skipZeroClusterWins, $capMutFreqs, $keepIgnored, 
    @rcin, @jsonin, @jsonData, @rcout, @filenames, 
    %allWins, %commonWins, %files, %params);

do {

    local $SIG{__WARN__} = sub { };

    GetOptions( "h|help"                     => \$help,
                "o|output=s"                 => \$output,
                "ow|overwrite"               => \$overwrite,
                "r|rc=s"                     => \$rcin,
                "rci|rc-index=s"             => \$rcindex,
                "j|json=s"                   => \$jsonin,
                "nm|no-merge-overlapping"    => \$noMergeOverlapping,
                "mom|min-overlap-merge=s"    => \$minOverlapMerge,
                "mcm|min-corr-merge=s"       => \$minCorrMerge,
                "ep|median-pre-cov=s"        => \$medianPreCov,
                "ec|median-cov=s"            => \$medianCov,
                "nc|min-confs=i"             => \$minConf,
                "xc|max-confs=i"             => \$maxConf,
                "e|extend=i"                 => \$extend,
                "sr|surround-to-rc"          => \$surroundToRC,
                "i|ignore-terminal=s"        => \$ignoreTerm,
                "ki|keep-ignored"            => \$keepIgnored,
                "mcr|min-corr-reps=s"        => \$minCorrMatch,
                "s|spearman"                 => \$spearman,
                "sz|skip-zero-cluster-wins"  => \$skipZeroClusterWins,
                "mor|min-overlap-reps=s"     => \$minOverlapReps,
                "cf|cap-mut-freqs=s"         => \$capMutFreqs,
                "ca|corr-all"                => \$corrAll,
                "cm|corr-by-majority"        => \$corrByMajority ) or help(1);

};

help() if ($help);

# Default
$output ||= "rf_json2rc/";
$minConf //= 2;
$maxConf ||= 1e9;
$ignoreTerm //= 0.05;
$medianPreCov //= 2000;
$medianCov //= 0;
$minOverlapReps ||= 0.75;
$minCorrMatch //= 0.7;
$minOverlapMerge ||= 0.75;
$minCorrMerge //= 0.7;
$extend //= 0;
$capMutFreqs ||= 1;
$success = 0;
$output =~ s/\/?$/\//;
@rcin = split(/,/, $rcin);
@jsonin = split(/,/, $jsonin);

$needRC = $minConf < 2 || $surroundToRC ? 1 : 0;

die "\n  [!] Error: No input JSON file provided\n\n" if (!@jsonin);
for (@jsonin) { die "\n  [!] Error: Provided JSON file \"" . $_ . "\" doesn't exist\n\n" if (!-e $_); }
die "\n  [!] Error: No input RC file provided\n\n" if ($needRC && !@rcin);
for (@rcin) { die "\n  [!] Error: Provided RC file \"$_\" doesn't exist\n\n" if (!-e $_); }
die "\n  [!] Error: Provided RCI index file does not exist\n\n" if ($needRC && defined $rcindex && !-e $rcindex);
die "\n  [!] Error: Fewer RC files than JSON files provided\n\n" if ($needRC && @rcin < @jsonin);
die "\n  [!] Error: Median cumulative coverage must be a positive int\n\n" if (!ispositive($medianCov) || !isint($medianCov));
die "\n  [!] Error: Median preCoverage must be a positive int\n\n" if (!ispositive($medianPreCov) || !isint($medianPreCov));
die "\n  [!] Error: Bases to ignore must be comprised between 0 and 0.2\n\n" if (!isnumeric($ignoreTerm) || !inrange($ignoreTerm, [0, 0.2]));
die "\n  [!] Error: Minimum overlap between matching windows must be comprised between 0 and 1\n\n" if (!isnumeric($minOverlapReps) || !inrange($minOverlapReps, [0, 1]));
die "\n  [!] Error: Minimum overlap between consecutive windows must be comprised between 0 and 1\n\n" if (!isnumeric($minOverlapMerge) || !inrange($minOverlapMerge, [0, 1]));
die "\n  [!] Error: Minimum correlation must be comprised between 0 and 1\n\n" if (!isnumeric($minCorrMatch) || !inrange($minCorrMatch, [0, 1]) || !isnumeric($minCorrMerge) || !inrange($minCorrMerge, [0, 1]));
die "\n  [!] Error: Minimum number of conformations is higher than maximum\n\n" if ($minConf > $maxConf);
die "\n  [!] Error: Mutation frequency cap must be comprised between 0 and 1\n\n" if (!isnumeric($capMutFreqs) || !inrange($capMutFreqs, [0, 1]));
die "\n  [!] Error: Parameter -cm requires -ca\n\n" if ($corrByMajority && !$corrAll);

print "\n[+] Making output directory...";

if (-e $output) {

    if ($overwrite) {

        my $error = rmtree($output);

        die "\n\n  [!] Error: " . $error . "\n\n" if ($error);

    }
    else { die "\n\n  [!] Error: Output directory already exists." .
               "\n      Please use -ow (or --overwrite) to overwrite output directory\n\n"; }

}

if (my $error = mktree($output, "755")) { die "\n\n  [!] Error: Unable to create temporary directory ($error)\n\n"; }

if ($needRC) {

print "\n[+] Opening RC file(s)...";

    # Replaces the RC file with an IO object pointing to that file
    @rcin = map { my $i = $rcindex ? $rcindex : (-e "$_.rci" ? "$_.rci" : undef);
                  RF::Data::IO::RC->new( file       => $_,
                                         index      => $i,
                                         buildindex => $i ? 0 : 1 ) } @rcin;

}

print "\n[+] Importing JSON file(s) [0 imported]";

for my $jsonIdx (0 .. $#jsonin) {

    my ($data, $file, $fixed, $lastid, 
        $nFiles, $legacy, $filenames);
    $file = $jsonin[$jsonIdx];
    ($data, $fixed) = fixJson($file);
    $data = decode_json($data);

    if (exists $data->{filename}) {

        $legacy = 1;
        $filenames = $data->{filename};

    }
    else { $filenames = exists $data->{params} ? $data->{params}->{mm} : $data->{filenames}; }

    # To check params agreement 
    if (exists $data->{params}) {

        # The $data->{params}->{$_} + 0 is needed to convert JSON::PP::Bolean objects into values
        push(@{$params{$_}}, blessed($data->{params}->{$_}) ? 
                             ($data->{params}->{$_} + 0 ? "true" : "false") : $data->{params}->{$_}) for (grep { $_ !~ /^(?:mm|output|processors|whitelist|outputRawNClusters|log-level|assignmentsDumpDir)$/ } keys %{$data->{params}});

    }

    push(@filenames, $legacy ? $filenames : @$filenames);
    $nFiles = $legacy ? 1 : scalar(@$filenames);

    die "\n\n  [!] Error: Fewer RC files than processed MM files provided (provided: " . scalar(@rcin) . 
        ", expected so far: " . scalar(@filenames) . ")\n\n" if ($needRC && @rcin < @filenames);

    foreach my $fileIdx (0 .. $nFiles - 1) {

        $file = $legacy ? $filenames : $filenames->[$fileIdx];
        push(@jsonData, {}); # Initializing storage of JSON data

        foreach my $transcript (@{$data->{transcripts}}) {

            my ($id, $sequence, $nReads, @windows);
            $id = $transcript->{id}; 
            $sequence = $transcript->{sequence};
            $nReads = $legacy ? $transcript->{nReads} : $transcript->{nReads}->[$fileIdx];

            if ($needRC && !$rcin[$jsonIdx + $fileIdx]->read($id)) {

                print CLRRET . "[!] Warning: transcript \"$id\" is missing in RC file \"" . $rcin[$jsonIdx + $fileIdx]->file() . "\", and it will be skipped...\n";

                next;

            }

            # Storing parsed JSON for later
            $jsonData[-1]->{$id} = { sequence => $sequence,
                                     nReads   => $nReads,
                                     windows  => {} };

            foreach my $window ($legacy ? @{$transcript->{windows}} : @{$transcript->{windows}->[$fileIdx]}) {

                die "\n\n  [!] Error: Input file \"$file\" does not look like a valid DRACO JSON file (preCoverage field is missing)\n\n" if (!exists $window->{preCoverage});

                next if (median(@{$window->{preCoverage}}) < $medianPreCov);

                my ($start, $end, @stoichiometries, @cumCov);
                $start = $window->{start};
                $end = $window->{end};
    
                if (exists $window->{stoichiometries} && @{$window->{stoichiometries}}) { @stoichiometries = map { [$_] } @{$window->{stoichiometries}}; }
                else {

                    next if ($skipZeroClusterWins);

                    @stoichiometries = ([1]);

                }

                next if (@stoichiometries < $minConf || @stoichiometries > $maxConf);

                # This handles the special cases in which DRACO fails to re-assign reads to the sortConformations
                # so the counts and coverage arrays are not reported
                next if (@stoichiometries > 1 && (!exists $window->{counts} || !exists $window->{coverage}));

                foreach my $cov (@{$window->{coverage}}) { @cumCov = map { $cumCov[$_] + $cov->[$_] } 0 .. $#{$cov}; }

                next if (@cumCov && median(@cumCov) < $medianCov);

                # Storing parsed JSON for later
                $jsonData[-1]->{$id}->{windows}->{$start} = { end             => $end,
                                                              stoichiometries => \@stoichiometries,
                                                              counts          => @stoichiometries > 1 ? $window->{counts} : [],
                                                              coverage        => @stoichiometries > 1 ? $window->{coverage} : [] };

                $allWins{$file}->{$id} = [] if (!exists $allWins{$file}->{$id});
                push(@{$allWins{$file}->{$id}}, [$start, $end, scalar(@stoichiometries)]);

                $lastid = $id;
            
            }

        }

    }

    print CLRRET . "[+] Importing JSON file(s) [" . scalar(keys %allWins) . " imported]";

    # The last JSON has been fixed, so we will ignore the last window (for safety)
    pop(@{$allWins{$file}->{$lastid}}) if ($fixed);

}

die "\n\n  [!] Error: No window passed the provided filters\n\n" unless (scalar(keys %allWins));

print "\n[i] Order of input files (must be matched by RC files):" .
      "\n    " . join(", ", @filenames) . "\n" if ($needRC);

# Checks param agreement only if all JSONs have the params field
if (keys %params && scalar(@{$params{(keys %params)[0]}}) == @jsonin) {

    if (my @disagree = grep { scalar(uniq(@{$params{$_}})) != 1 } keys %params) {

        print "\n[!] Warning: Parameter mismatch across DRACO runs, results might be inconsistent\n";
        print "\n    [*] $_: " . join(", ", uniq(@{$params{$_}})) for (@disagree);
        print "\n";

    }

}

if (!$noMergeOverlapping) {

    print "\n[+] Merging concordant overlapping windows...\n";

    foreach my $fileIdx (0 .. $#filenames) {

        my ($from, $to, $file);
        ($from, $to) = (0, 0);
        $file = $filenames[$fileIdx];

        foreach my $id (keys %{$allWins{$file}}) {

            my @wins = sort { $a->[2] <=> $b->[2] ||
                              $a->[0] <=> $b->[0] } @{$allWins{$file}->{$id}};
            $from += @wins;

            for (my $i=0; $i < @wins; $i++) {

                my ($last, $firstj, $nWins, @newSet, %nconfs);
                $last = $wins[$i];

                for (my $j=0; $j < @wins; $j++) {

                    my ($win, $int, $sizeInt);
                    $win = $wins[$j];

                    # Check windows have same number of conformations
                    next if ($win->[2] != $last->[2]);
                    #last if ($win->[2] > $last->[2]);

                    next if ($win->[0] < $last->[0] || $win->[1] < $last->[0]);
                    last if ($win->[0] > $last->[1]);

                    $int = intersect([@{$last}[0..1]], [@{$win}[0..1]]);

                    next if (!$int);

                    $sizeInt = abs(diff(@{$int})) / min(abs(diff(@{$last}[0..1])),abs(diff(@{$win}[0..1])));

                    next if ($sizeInt < $minOverlapMerge && $win->[2] > 1);

                    $firstj = $j if (!defined $firstj);
                    $nWins++;
                    $nconfs{$win->[2]} = [] if (!exists $nconfs{$win->[2]});
                    push(@{$nconfs{$win->[2]}}, $win->[0]);

                }

                foreach my $n (sort {$a <=> $b} keys %nconfs) {

                    if ($n == 1) { # Windows forming 1 conformation

                        my ($start, $end);
                        $start = min(@{$nconfs{$n}});
                        $end = max(map { $jsonData[$fileIdx]->{$id}->{windows}->{$_}->{end} } @{$nconfs{$n}});
                        push(@newSet, [$start, $end, $n]);
                        $jsonData[$fileIdx]->{$id}->{windows}->{$start}->{end} = $end;

                    }
                    else { # Windows forming > 1 conformation

                        # Windows to merge
                        if (@{$nconfs{$n}} > 1) {

                            my ($lastStart, @tmpSet);
                            $lastStart = shift(@{$nconfs{$n}});

                            while (my $start = shift(@{$nconfs{$n}})) {

                                my ($lastWin, $win, $int, @sortedConfs,
                                    @lastCounts, @lastCoverage, @winCounts, @winCoverage);
                                $lastWin = $jsonData[$fileIdx]->{$id}->{windows}->{$lastStart};
                                $win = $jsonData[$fileIdx]->{$id}->{windows}->{$start};
                                $int = intersect([$lastStart, $lastWin->{end} - 1], [$start, $win->{end} - 1]); 
                                @lastCounts = map { [ @{$_}[$int->[0] - $lastStart .. $int->[1] - $lastStart] ] } @{$lastWin->{counts}};
                                @lastCoverage = map { [ @{$_}[$int->[0] - $lastStart .. $int->[1] - $lastStart] ] } @{$lastWin->{coverage}};
                                @winCounts = map { [ @{$_}[$int->[0] - $start .. $int->[1] - $start] ] } @{$win->{counts}};
                                @winCoverage = map { [ @{$_}[$int->[0] - $start .. $int->[1] - $start] ] } @{$win->{coverage}};
                                @sortedConfs = sortConformations(substr($jsonData[$fileIdx]->{$id}->{sequence}, $int->[0], abs(diff(@{$int})) + 1),
                                                                 \@lastCounts, \@lastCoverage, \@winCounts, \@winCoverage, $minCorrMerge);

                                if (@sortedConfs) { # Exceeded correlation threshold

                                    my (@mergedCounts, @mergedCoverage, @mergedStoichiometries);
                                    @mergedCounts = mergeOverlappingArrays($lastWin->{counts}, [ @{$win->{counts}}[@sortedConfs] ], abs(diff(@{$int})) + 1);
                                    @mergedCoverage = mergeOverlappingArrays($lastWin->{coverage}, [ @{$win->{coverage}}[@sortedConfs] ], abs(diff(@{$int})) + 1);
                                    @mergedStoichiometries = mergeStoichiometries($lastWin->{stoichiometries}, [@{$win->{stoichiometries}}[@sortedConfs] ]);

                                    # Updates window's data in jsonData
                                    $lastWin->{end} = max($lastWin->{end}, $win->{end});
                                    $lastWin->{counts} = \@mergedCounts;
                                    $lastWin->{coverage} = \@mergedCoverage;
                                    $lastWin->{stoichiometries} = \@mergedStoichiometries;

                                }
                                # If the last window does not exceed the correlation threshold, we skip it and go to the next one
                                else { push(@tmpSet, [$start, $win->{end}, $n]); }

                            }

                            push(@newSet, [$lastStart, $jsonData[$fileIdx]->{$id}->{windows}->{$lastStart}->{end}, $n]);
                            push(@newSet, @tmpSet) if (@tmpSet);

                        }
                        else { push(@newSet, [$nconfs{$n}->[0], $jsonData[$fileIdx]->{$id}->{windows}->{$nconfs{$n}->[0]}->{end}, $n]); }

                    }

                }

                if (@newSet < $nWins) {

                    @newSet = sort { $a->[0] <=> $b->[0] } @newSet;
                    splice(@wins, $firstj, $nWins, @newSet);
                    $i--;

                }

            }

            $allWins{$file}->{$id} = \@wins;
            $to += @wins;

        }

        print "\n  [-] " . (fileparse($file, ".mm"))[0] . ": " . $from . " windows merged into " . $to . " windows";

    }

    print "\n";

}

# Initialize common windows with one of the files
foreach my $id (keys %{$allWins{$filenames[0]}}) { $commonWins{$id} = [ map { [@$_, { $filenames[0] => [ @$_[0..1] ] }] } @{$allWins{$filenames[0]}->{$id}} ]; }
delete($allWins{$filenames[0]});

if (@filenames > 1) {

    print "\n[+] Identifying common windows...";

    foreach my $file (sort keys %allWins) {

        foreach my $id (keys %commonWins) {

            if (!exists $allWins{$file}->{$id}) {

                delete($commonWins{$id});

                next;

            }

            my (@commonSet);

            for (my $i = 0; $i < @{$commonWins{$id}}; $i++) {

                my ($start1, $end1, $n1, $map, @last);
                ($start1, $end1, $n1, $map) = @{$commonWins{$id}->[$i]};

                for (my $j = 0; $j < @{$allWins{$file}->{$id}}; $j++) {

                    my ($start2, $end2, $n2, $int);
                    ($start2, $end2, $n2) = @{$allWins{$file}->{$id}->[$j]};

                    next if ($n1 > $n2); # Different n of conformations
                    last if ($n1 < $n2);

                    next if ($end2 < $start1);
                    last if ($start2 > $end1); # Start 2nd win is > end 1st win

                    $int = intersect([$start1, $end1], [$start2, $end2]);

                    if ($int) {

                        my ($sizeInt, $meanOverlap, @sizes);
                        $sizeInt = abs(diff(@{$int}));
                        @sizes = ($sizeInt / abs(diff($start1, $end1)), $sizeInt / abs(diff($start2, $end2)));

                        next if (max(@sizes) < $minOverlapReps);

                        $meanOverlap = mean(@sizes);
                        @last = (@{$int}, $j, $meanOverlap) if (!@last || $meanOverlap > $last[3]);

                    }

                }

                if (@last) {

                    push(@commonSet, [@last[0..1], $n1, { %{$map}, $file => [ @{$allWins{$file}->{$id}->[$last[2]]}[0..1] ] }]);
                    splice(@{$allWins{$file}->{$id}}, $last[2], 1);

                }

            }

            delete($commonWins{$id});
            $commonWins{$id} = \@commonSet if (@commonSet);

        }

    }

    print " " . (keys %commonWins ? sum(map { scalar(@{$commonWins{$_}}) } keys %commonWins) : 0) . " identified.";

}

print "\n[+] Filtering windows...";

# Create RC IO objects for output
for (@filenames) {

    my $id = (fileparse($_, ".mm"))[0];

    if (exists $files{$id}) { 
        
        $id .= "_" . $files{$id};
        $files{$id}++;
        
    }
    else { $files{$id} = 1; }

    push(@rcout, RF::Data::IO::RC->new( file => "${output}$id.rc",
                                        mode => "w" ));

}

open(my $sh, ">", $output . "stoichiometries.txt") or die "\n\n  [!] Error: Unable to write stoichiometry summary (" . $! . ")\n\n";
select((select($sh), $|=1)[0]);

print $sh join("\t", qw(#Transcript Start End extStart extEnd), map { (fileparse($_, ".mm"))[0] } @filenames) . "\n";

foreach my $id (sort keys %commonWins) {

    foreach my $win (@{$commonWins{$id}}) {

        my ($start, $end, $nConfs, $winSeq, $upSeq,
            $downSeq, @refCounts, @refCoverage, @rcCache,
            @stoichiometryCache, %coords);
        ($start, $end, $nConfs) = @{$win}[0..2];

        $winSeq = substr($jsonData[0]->{$id}->{sequence}, $start, $end - $start);
        %coords = %{$win->[3]};
        @rcCache = map { [] } 0 .. $#filenames; # For storing RC data, before writing it
        @stoichiometryCache = map { [] } 0 .. $#filenames; # For storing stoichiometries

        # In case we have to enlarge the window upstream and downstream
        # upExtend and downExtend will contain the extension sequence, counts, and coverage
        if ($extend) {

            my ($newStart, $newEnd);
            $newStart = max(0, $start - $extend);
            $newEnd = min(length($jsonData[0]->{$id}->{sequence}), $end + $extend);
            $upSeq = substr($jsonData[0]->{$id}->{sequence}, $newStart, $start - $newStart);
            $downSeq = substr($jsonData[0]->{$id}->{sequence}, $end, $newEnd - $end);

        }

        for my $i (0 .. $#filenames) {

            my ($realStart, $realEnd, $realWin);
            ($realStart, $realEnd) = @{$coords{$filenames[$i]}};
            $realWin = $jsonData[$i]->{$id}->{windows}->{$realStart};

            if ($nConfs > 1) {

                my ($adjStart, $adjEnd, $c, @sortedConfs,
                    @counts, @coverage, @unmaskedCounts, @unmaskedCov);
                $c = 0;
                $adjStart = $start - $realStart;
                $adjEnd = $end - $realStart - 1;
                @counts = map { [ @{$realWin->{counts}->[$_]}[$adjStart .. $adjEnd] ] } 0 .. $nConfs - 1;
                @coverage = map { [ @{$realWin->{coverage}->[$_]}[$adjStart .. $adjEnd] ] } 0 .. $nConfs - 1;

                # Let's mask bases on which correlation should not be calculated
                if ($ignoreTerm) {

                    my $basesToIgnore = round($ignoreTerm * ($end - $start));

                    if ($keepIgnored) {

                        @unmaskedCounts = @{clonearrayref(\@counts)};
                        @unmaskedCov = @{clonearrayref(\@coverage)};

                    }

                    for (0 .. $#counts) {

                        @{$counts[$_]}[0 .. $basesToIgnore - 1] = (0) x $basesToIgnore;
                        @{$coverage[$_]}[0 .. $basesToIgnore - 1] = (0) x $basesToIgnore;
                        @{$counts[$_]}[$#{$counts[$_]} - $basesToIgnore + 1 .. $#{$counts[$_]}] = (0) x $basesToIgnore;
                        @{$coverage[$_]}[$#{$coverage[$_]} - $basesToIgnore + 1 .. $#{$coverage[$_]}] = (0) x $basesToIgnore;

                    }

                }

                # Initializes the reference to which everything will be compared
                if (!@refCounts) {

                    @refCounts = @counts;
                    @refCoverage = @coverage;
                    @sortedConfs = (0 .. $#refCounts);

                }
                else { @sortedConfs = sortConformations($winSeq, \@refCounts, \@refCoverage, \@counts, \@coverage, $minCorrMatch); }

                if (@sortedConfs) {

                    my (@upCounts, @upCoverage, @downCounts, @downCoverage);

                    if ($ignoreTerm && $keepIgnored) {
                       
                        @counts = @unmaskedCounts;
                        @coverage = @unmaskedCov;

                    } 

                    if ($extend) {

                        if ($surroundToRC) {

                            my ($entry, @totCounts, @totCoverage);
                            $entry = $rcin[$i]->read($id);
                            @totCounts = $entry->counts();
                            @totCoverage = $entry->coverage();
                            @upCounts = @totCounts[$start - length($upSeq) .. $start - 1];
                            @downCounts = @totCounts[$end .. $end + length($downSeq) - 1];
                            @upCoverage = @totCoverage[$start - length($upSeq) .. $start - 1];
                            @downCoverage = @totCoverage[$end .. $end + length($downSeq) - 1];

                        }
                        else {

                            @upCounts = (0) x length($upSeq);
                            @downCounts = (0) x length($downSeq);
                            @upCoverage = @upCounts;
                            @downCoverage = @downCounts;

                        }

                    }

                    for (@sortedConfs) {

                        my $rc = RF::Data::RC->new( id         => $id . "_" . $start . "-" . ($end - 1) . "_c" . $c,
                                                    sequence   => $upSeq . $winSeq . $downSeq,
                                                    counts     => [ @upCounts, @{$counts[$_]}, @downCounts ],
                                                    coverage   => [ @upCoverage, @{$coverage[$_]}, @downCoverage ],
                                                    readscount => $jsonData[$i]->{$id}->{nReads} );

                        push(@{$rcCache[$i]}, $rc);
                        push(@{$stoichiometryCache[$i]}, $realWin->{stoichiometries}->[$_]);
                        $c++;

                    }

                }

            }
            else { # If 1 conformation, extract it from the original RC file

                my ($entry, @totCounts, @totCoverage, @counts);
                $entry = $rcin[$i]->read($id);
                @totCounts = $entry->counts();
                @totCoverage = $entry->coverage();
                my $rc = RF::Data::RC->new( id         => $id . "_" . $start . "-" . ($end - 1) . "_c0",
                                            sequence   => $upSeq . $winSeq . $downSeq,
                                            counts     => [ @totCounts[$start - length($upSeq) .. $end - 1 + length($downSeq)] ],
                                            coverage   => [ @totCoverage[$start - length($upSeq) .. $end - 1 + length($downSeq)] ],
                                            readscount => $jsonData[$i]->{$id}->{nReads} );

                push(@{$rcCache[$i]}, $rc);
                push(@{$stoichiometryCache[$i]}, [1]);

            }

        }

        if (uniq(map { scalar(@$_) } @rcCache) == 1) {

            $rcout[$_]->write(@{$rcCache[$_]}) for (0 .. $#rcCache);
            print $sh join("\t", $id, $start, ($end - 1), ($start - length($upSeq)), ($end + length($downSeq) - 1), map { join(";", map { sprintf("%.3f", mean(@$_)) } @$_) } @stoichiometryCache) . "\n";

            $success++;

        }

    }

}

close($sh);

print " " . $success . " windows left.";

if (!$success) {

    warn "\n\n  [!] Warning: no window passed the correlation filter\n";

    rmtree($output);

}
else { $_->close() for (@rcout); }

print "\n[+] All done.\n\n";

sub sortConformations {

    my ($winSeq, $refCounts, $refCoverage, $counts, $coverage, $corrCutoff) = @_;

    my ($bestSet, $bestCorr, @refProfiles, @profiles,
        @combinations);

    if (!defined $acOnly) { # Guessing if data is DMS or not

        my $guCount = 0;

        for my $i (0 .. $#{$refCounts}) { $guCount += sum(map { substr($winSeq, $_, 1) !~ m/^[AC]$/ ? $refCounts->[$i]->[$_] : 0 } 0 .. $#{$refCounts->[$i]}); }

        $acOnly = $guCount ? 0 : 1;

    }

    for my $i (0 .. $#{$refCounts}) {

        push(@refProfiles, [ map { $refCoverage->[$i]->[$_] ? $refCounts->[$i]->[$_] / $refCoverage->[$i]->[$_] : 0 } 0 .. $#{$refCounts->[$i]} ]);
        push(@profiles, [ map { $coverage->[$i]->[$_] ? $counts->[$i]->[$_] / $coverage->[$i]->[$_] : 0 } 0 .. $#{$counts->[$i]} ]);

    }

    @combinations = map { my $i = $_; map { [[$refProfiles[$i], $profiles[$_] ], $i, $_] } 0 .. $#profiles } 0 .. $#refProfiles;

    # For each combination, we calculate the correlation and we replace the arrays with the correlation coefficient + index of the conformations
    foreach my $comb (@combinations) {

        my ($r, @i);
        @i = grep { ($acOnly && substr($winSeq, $_, 1) =~ m/^[AC]$/) || !$acOnly } 0 .. $#{$comb->[0]->[0]};
        $r = (_correlate([@{$comb->[0]->[0]}[@i]], [@{$comb->[0]->[1]}[@i]], { cap => $capMutFreqs }))[0];

        # This handles those cases in which the stdev of the values in one of the arrays is 0 and the correlation is NaN
        return() if (isnan($r));

        $comb->[0] = $r;

    }
    
    foreach my $set (calcCombinations(@combinations)) {

        if ($corrAll) { next if (sum(map { $set->[$_]->[0] >= $corrCutoff ? 1 : 0 } 0 .. $#{$refCounts}) < ($corrByMajority ? $#{$refCounts} : @{$refCounts})); }

        my $avgCorr = mean(map { $set->[$_]->[0] } 0 .. $#{$refCounts});

        next if (!$corrAll && $avgCorr < $corrCutoff);

        if (!defined $bestCorr || $avgCorr > $bestCorr) {

            $bestSet = $set;
            $bestCorr = $avgCorr;

        }

    }

    return() if (!$bestSet);

    # Returns the indices of the windows with respect to the reference set
    return(map { $_->[2] } sort { $a->[1] <=> $b->[1] } @{$bestSet});

}

sub _correlate { return($spearman ? spearman(@_) : pearson(@_)); }

sub calcCombinations {

    my @data = @_;

    my ($n, $search, @matrix, @combinations);

    for my $row (@data) { # Build correlation matrix

        my ($corr, $i, $j) = @$row;
        $matrix[$i][$j] = $corr;

    }

    $n = @matrix;

    $search = sub {

        my ($i, $used, $current) = @_;

        # One full non-overlapping set built
        if ($i == $n) {

            push(@combinations, [ @$current ]);
            return;
        
        }

        for my $j (0 .. $n - 1) {

            next if ($used->{$j});

            $used->{$j} = 1;
            push(@$current, [$matrix[$i][$j], $i, $j]);

            $search->($i + 1, $used, $current);

            pop(@$current);
            delete($used->{$j});

        }

    };

    $search->(0, {}, []);

    return(@combinations);

}

sub mergeOverlappingArrays {

    my ($set1, $set2, $lenOverlap) = @_;

    my (@set1, @set2, @merge);
    @set1 = map { [@$_, ((0) x (scalar(@{$set2->[0]}) - $lenOverlap))] } @{$set1};
    @set2 = map { [((0) x (scalar(@{$set1->[0]}) - $lenOverlap)), @$_] } @{$set2};

    for my $i (0 .. $#set1) {

        my @conf = map { $set1[$i]->[$_] + $set2[$i]->[$_] } 0 .. $#{$set1[$i]};
        push(@merge, \@conf);

    }

    return(@merge);

}

sub mergeStoichiometries {

    my ($set1, $set2) = @_;

    return(map { [ @{$set1->[$_]}, @{$set2->[$_]} ] } 0 .. $#{$set1});

}

sub fixJson {

    my $file = shift;

    my ($json, $fixed, @json, @trail);

    open(my $fh, "<", $file) or die "\n\n  [!] Error: Unable to read from JSON file (" . $! . ")\n\n";
    @json = <$fh>;
    close($fh);

    $json = join("", @json);
    $json =~ s/(?:\n|\r)//g;
    $json .= "\"NaN\"" if (substr($json, -1) eq ",");

    while($json =~ m/([\{\}\[\]])/g) {

        my $last = $1;

        if ($last =~ m/^[\[\{]$/) { push(@trail, $last); }
        else { pop(@trail); }

    }

    while (@trail) {

        my $last = pop(@trail);

        if ($last eq "[") { $json .= "]"; }
        elsif ($last eq "{") { $json .= "}"; }

        if (!$fixed) {

            print CLRRET . "[!] Warning: Truncated JSON for file \"" . $file . "\". Attempting repair, and ignoring last window...";
            $fixed = 1;

        }

    }

    return($json, $fixed);

}

sub help {

    print "\n  [!] Error: Invalid option. Please check the help\n" if ($_[0]);

    die <<HELP;

 RF JSON2RC (v$Core::Utils::VERSION)
 RNA Framework [http://www.rnaframework.com]

 Author:  Danny Incarnato (dincarnato[at]rnaframework.com)
 Summary: Extract information for deconvoluted structure profiles from DRACO JSON output,
          producing RC files suitable for structure prediction

 Usage:   rf-json2rc [Options] -j file1.json,file2.json,...,filen.json -r file1.rc,file2.rc,...,filen.rc

 Options                                          Description
 -o   or --output                 <string>        Output folder (Default: rf_json2rc/)
 -ow  or --overwrite                              Overwrites output folder (if the specified folder already exists)
 -j   or --json                   <string>        A comma-separated list of DRACO JSON files from replicate experiments
 -r   or --rc                     <string>        A comma-separated list of RC files from replicate experiments
                                                  Note: the RC files must follow the same order of the JSON files
 -rci or --rc-index               <string>        An RCI index file to be used for all input RC files
                                                  Note: If no RCI index is provided, RF JSON2RC will look for files with .rci
                                                        extension in the same input folder as the RC files, named after the RC
                                                        files (e.g., Sample.rc will look for Sample.rc.rci).
                                                        If no RCI file is found, it will be created at runtime, and stored in
                                                        the same folder of the input RC files.
 -ep  or --median-pre-cov         <int>           Windows with median preCoverage below this threshold, will be discarded (Default: 2000)
 -ec  or --median-cov             <int>           Windows with a mediam cumulative coverage (the sum of the coverage across all the
                                                  conformations for that window) below this threshold, will be discarded (Default: 0)
 -sz  or --skip-zero-cluster-wins                 Skips windows for which DRACO failed to identify the number of conformations
 -nc  or --min-confs              <int>           Windows forming less than this number of conformations will be discarded (Default: 2)
 -xc  or --max-confs              <int>           Windows forming more than this number of conformations will be discarded (Default: no limit)
 -nm  or --no-merge-overlapping                   Disables merging of intra-replicate concordant overlapping windows
 -mom or --min-overlap-merge      <float>         Minimum fractional overlap between two concordant overlapping windows to be merged (0-1, Default: 0.5)
 -mcm or --min-corr-merge         <float>         Minimum average correlation between corresponding conformations for concordant overlapping
                                                  windows to be merged (0-1, Default: 0.7)
 -e   or --extend                 <int>           Windows are extended by these many bases upstream and downstream (Default: off)
                                                  Note: these bases will be assigned a coverage and mutation count of 0
 -sr  or --surround-to-rc                         Instead of getting coverage and mutation count of 0, bases in up/downstream extensions
                                                  will be assigned the same coverage and mutation count they have in the input RC files (requires -e)
 -i   or --ignore-terminal        <float>         Coverage and mutation counts for this fraction of bases at window termini will be ignored during
                                                  correlation calculation (0-0.2, Default: 0.05)
 -ki  or --keep-ignored                           Bases ignored during correlation calculation, will be kept in the output RC files
                                                  Note: by default, both counts and coverage for these bases is set to 0
 -mor or --min-overlap-reps       <float>         Minimum fractional overlap between windows across replicates to be merged (0-1, Default: 0.5)
 -mcr or --min-corr-reps          <float>         Minimum correlation between corresponding conformations for matched windows across
                                                  replicates, to be reported (0-1, Default: 0.7)
 -s   or --spearman                               Spearman will be used instead of Pearson for correlation analyses
 -cf  or --cap-mut-freqs          <float>         Mutation frequencies will be capped to this value for correlation calculation (>0-1, Default: 1 (no cap))
 -ca  or --corr-all                               Each pairwise comparison between each conformation across every replicate needs to exceed the minimum
                                                  correlation threshold (Default: only the average of all pairwise comparisons needs to exceed the threshold)
 -cm  or --corr-by-majority                       If two matching windows form N conformations, and at least N-1 conformations exceed the minimum correlation
                                                  threshold, the last conformation is also accepted even if the correlation does not exceed the threshold
                                                  (requires -ca)

HELP

}
