#!/bin/bash
PROGRAM="Iterative Refinement Meta-Assembler (IRMA)"
AUTHOR="Samuel S. Shepard"
EMAIL="sshepard@cdc.gov"
AFFIL="CDC/DDID/NCIRD/ID"
DATE="27 Apr 2026"
VERSION="1.3.2"
LICENSE="Licensed under Apache 2 with a Public Domain copyright. This program comes with ABSOLUTELY NO WARRANTY. See notices in <README.md>.
Distributed, third-party components have non-commercial restrictions! See: <IRMA_RES/third_party/MANIFEST.md>"

function print_usage_and_quit() {
    echo -e "$PROGRAM, v$VERSION ($DATE)"
    echo -e "$AUTHOR ($AFFIL), $EMAIL\n"
    echo -e "$LICENSE\n\nUsage:"
    echo -e "(PAIRED-END):\tIRMA <MODULE|MODULE-CONFIG> <R1.fastq.gz|R1.fastq> <R2.fastq.gz|R2.fastq> [path/to/]<sample_name> [options]"
    echo -e "(SINGLE-END):\tIRMA <MODULE|MODULE-CONFIG> <fastq|fastq.gz> [path/to/]<sample_name> [options]\n"
    echo -e "Options:\n\t--external-config|-c <VALID_CONFIG_PATH>\n"
    exit 1
}

args=()
EXTERNAL_CONFIG=""
while [[ "$#" -gt 0 ]]; do
    case $1 in
        --external-config | -c)
            shift
            EXTERNAL_CONFIG=$1
            [ -n "$EXTERNAL_CONFIG" ] || print_usage_and_quit
            shift
            ;;
        --* | -*)
            print_usage_and_quit
            ;;
        *)
            args+=("$1")
            shift
            ;;
    esac
done
nargs=${#args[@]}

if [[ $nargs == 4 ]]; then
    PAIRED=1
    MODULE_CONFIG=${args[0]}
    LEFT=${args[1]}  # R1 / left FASTQ
    RIGHT=${args[2]} # R2 / right FASTQ
    RUN=${args[3]}   # name of the run
elif [[ $nargs == 3 ]]; then
    PAIRED=0
    MODULE_CONFIG=${args[0]}
    LEFT=${args[1]} # single paired end FASTQ
    RIGHT=""
    RUN=${args[2]} # name of the run
else
    print_usage_and_quit
fi

#### UTILITY FUNCTIONS ####
function time_stamp() {
    local t=$(date +"%Y-%m-%d %k:%M:%S")
    echo -e "[$t]\tIRMA/$MODULE_CONFIG $1"
}

function abort() {
    time_stamp "ERROR: $1"
    time_stamp "ABORTED run: $RUN"
    exit 1
}

function rmglob() {
    local glob=""
    for glob in "$@"; do
        if [ -e "$glob" ]; then
            rm "$glob"
        fi
    done
}

function catglob() {
    local glob=""
    for glob in "$@"; do
        if [ -r "$glob" ]; then
            cat "$glob"
        fi
    done
}

wait_check() {
    local F=0
    local p=0
    local -a P=("$@")
    for p in "${P[@]}"; do
        wait "$p" || F=1
    done
    return $F
}

###########################

# variables
OS="Linux"
command -v uname > /dev/null 2>&1 && OS=$(uname -s)
if [[ "$OS" != "Linux" && "$OS" != "Darwin" ]]; then
    time_stamp "Your $OS OS is not yet supported. IRMA only supports macOS or Linux at this time."
    exit 0
fi
IRMA_BIN_SUFFIX=$OS
[[ "$IRMA_BIN_SUFFIX" == "Linux" ]] && IRMA_BIN_SUFFIX+="_$(uname -m)"

R=1        # current round
LANG=POSIX # collation
owd=$(pwd)
destination_directory="$owd"
bpath=
if [ "$bpath" == "" ]; then
    bpath=$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)
fi
echo "${BASH_SOURCE[0]}"
rpath="$bpath/IRMA_RES"      # IRMA resources
spath="$rpath/scripts"       # IRMA specific scripts
vendorp="$rpath/third_party" # Third party binaries distributed with IRMA

# process optional output diretory
if [ "$(basename "$RUN")" != "$RUN" ]; then
    if [ "${RUN:0:1}" == "/" ]; then
        destination_directory=$(dirname "$RUN")
    else
        destination_directory="$owd"/"$(dirname "$RUN")"
    fi

    if [ ! -d "$destination_directory" ]; then
        time_stamp "output directory does not exist, creating: $destination_directory"
        mkdir -p "$destination_directory" 2> /dev/null \
            && time_stamp "output directory created" \
            || {
                time_stamp "creation failed, using default"
                destination_directory="$owd"
            }
    fi

    RUN=$(basename "$RUN")
fi

# make sure output directory is valid
if [ ! -w "$destination_directory" ]; then
    timestamp "WARNING: output directory may not be writable: $destination_directory"
fi

# test input files
[ ! -s "$LEFT" ] && abort "file '$LEFT' empty or unreadable"
[[ "$RIGHT" != "" && ! -s "$RIGHT" ]] && abort "file '$RIGHT' empty or unreadable"
[ "$RIGHT" == "$LEFT" ] && abort "both read pairs point to the same file '$LEFT'"

# FNC - CHECK PROGRAM #
# Idea courtesy: stackoverflow.com/questions/592620/check-if-a-program-exists-from-a-bash-script
function check_prgm() {
    command -v "$1" > /dev/null 2>&1 || {
        echo "$PROGRAM ERROR: '$1' not found, add to the PATH or install." >&2
        exit 1
    }
}

# Find LABEL
if [ -x "$bpath/LABEL" ]; then
    LABEL="$bpath/LABEL"
elif command -v "LABEL" > /dev/null 2>&1; then
    LABEL=$(command -v LABEL)
elif [ -x "$bpath/../label/LABEL" ]; then
    LABEL="$(dirname "$bpath")/label/LABEL"
else
    check_prgm LABEL
fi

check_prgm Rscript
check_prgm perl

# Find LABEL resources
Lvendorp="${LABEL}_RES/third_party"
Lspath="${LABEL}_RES/scripts"
if [[ ! -d "$Lvendorp" || ! -d "$Lspath" ]]; then
    abort "IRMA requires LABEL v0.7.0 or later"
fi

function set_bin() {
    unalias "$1" 2> /dev/null

    command -v "$1" 2> /dev/null || {
        local cmd="$vendorp/${1}"
        if [ -x "$cmd" ]; then
            echo "$cmd"
        else
            cmd+="_$IRMA_BIN_SUFFIX"
            if [ -x "$cmd" ]; then
                echo "$cmd"
            else
                die "Could could not find program: $1"
            fi
        fi
    }
}
export -f set_bin

PARALLEL=$(set_bin parallel)
SAMTOOLS=$(set_bin samtools)
BLAT=$(set_bin blat)
SSW=$(set_bin ssw)
MM2=$(set_bin minimap2)
RSCRIPT=$(set_bin Rscript)
A2M="$Lvendorp/align2model_$IRMA_BIN_SUFFIX"
BASH=bash

check_prgm "$A2M"

# check for module
MODULE=$(echo "$MODULE_CONFIG" | cut -f1 -d'-')
if [ ! -d "$rpath/modules/$MODULE" ]; then
    echo "IRMA ERROR: $MODULE module not found in '$rpath/modules'."
    exit 1
else
    mpath="$rpath"/modules/$MODULE
fi

# load reference dataset paths
Lmpath="$(dirname "$LABEL")/LABEL_RES/training_data/irma-$MODULE"
if [ -n "$(
    shopt -s nullglob
    echo "$mpath"/profiles/*.mod
)" ]; then
    phmms="$mpath"/profiles
elif [ -r "$Lmpath" ]; then
    phmms="$Lmpath"
else
    phmms="none"
fi

# load module parameter file
DEF_SET="$mpath"/reference/consensus.fasta
if [ -r "$rpath"/defaults.sh ]; then
    source "$rpath"/defaults.sh
else
    abort "Missing defaults, please restore from $rpath/.defaults.sh.bak"
fi

# init variables from config files
[ -r "$mpath"/init.sh ] && source "$mpath"/init.sh

if [ -r "$mpath/config/${MODULE_CONFIG}.sh" ]; then
    source "$mpath/config/${MODULE_CONFIG}.sh"
else
    CONFIG=$(echo "${MODULE_CONFIG}" | cut -f2- -d'-')
    if [ -r "$mpath/config/${CONFIG}.sh" ]; then
        source "$mpath/config/${CONFIG}.sh"
    elif [ "$MODULE" != "$CONFIG" ]; then
        echo "IRMA ERROR: Configuration '$MODULE_CONFIG' not found. Valid configs are:"
        tmp=" "
        for i in "$mpath"/config/*sh; do tmp+=" $(basename "$i" .sh)"; done
        echo "$tmp"
        exit 0
    fi
fi

if [ -n "$EXTERNAL_CONFIG" ]; then
    external_vars=$("$spath/sanitize_shell_configs.pl" "$EXTERNAL_CONFIG") \
        || {
            echo "IRMA ERROR: Configuration '$EXTERNAL_CONFIG' could not be processed."
            exit 1
        }
    eval "$external_vars" || {
        echo "IRMA ERROR: could not set variables from: '$EXTERNAL_CONFIG'"
        exit 1
    }
fi

source "$spath/deprecated_or_removed.sh"

IRMA_CORE=""
IRMA_CORE_VERSION="irma-core 0.9.1"
# shellcheck disable=SC2034
USE_IRMA_CORE=1

if [ -x "$spath/irma-core_$IRMA_BIN_SUFFIX" ]; then
    IRMA_CORE="$spath/irma-core_$IRMA_BIN_SUFFIX"
elif command -v "irma-core" > /dev/null 2>&1; then
    IRMA_CORE=$(command -v irma-core)
elif [ -x "$bpath/../irma-core/target/release/irma-core" ]; then
    IRMA_CORE="$bpath/../irma-core/target/release/irma-core"
    echo "IRMA WARNING: irma-core not found in PATH or scripts folder, using: $IRMA_CORE"
else
    echo "IRMA ERROR: irma-core not detected. Please install it and add it to your path."
    exit 1
fi

if [ "$($IRMA_CORE --version)" != "$IRMA_CORE_VERSION" ]; then
    echo "IRMA ERROR: irma-core version mismatch. Expected: $IRMA_CORE_VERSION"
    exit 1
fi

export IFX_LOCAL_PROCS=$SINGLE_LOCAL_PROC
export LOCAL_PROCS_OVERRIDE
read -r SINGLE_LOCAL_PROC DOUBLE_LOCAL_PROC <<< "$($IRMA_CORE num-procs --physical --cap-cores-using-env --include-half)"

[ -n "$LFASTM" ] && SECONDARY_SORT=$LFASTM
# shellcheck disable=SC2034
REF1_SET=$REF_SET

# selection of Zip utility. PIGZ still optional.
if command -v pigz > /dev/null 2>&1; then
    ZIP="pigz"
    Z_OPTS="-p $DOUBLE_LOCAL_PROC -4"
elif command -v "$vendorp/pigz_$IRMA_BIN_SUFFIX" > /dev/null 2>&1; then
    ZIP="$vendorp/pigz_$IRMA_BIN_SUFFIX"
    Z_OPTS="-p $DOUBLE_LOCAL_PROC -4"
elif command -v gzip > /dev/null 2>&1; then
    ZIP="gzip"
    Z_OPTS="-4"
else
    check_prgm gzip
fi

# process seed reference library
if [ ! -s "$REF_SET" ]; then
    echo "IRMA ERROR! The REF_SET data was not found for: '$REF_SET'"
    exit 0
elif [ ! -r "$REF_SET" ]; then
    echo "IRMA ERROR! The REF_SET file was not readable: '$REF_SET'"
    exit 0
fi

if [ "$GRID_ON" -eq "1" ]; then
    if ! command -v qsub > /dev/null 2>&1; then
        echo "IRMA WARNING: configured with GRID_ON=1 but no 'qsub' command found. Switching to GRID_ON=0 for local computation."
        GRID_ON=0
    fi
fi

# build arrays
#shellcheck disable=2206
declare -a MATCH_PROGS=($MATCH_PROG)
#shellcheck disable=2206
declare -a SORT_PROGS=($SORT_PROG)
#shellcheck disable=2206
declare -a ALIGN_PROGS=($ALIGN_PROG)
#shellcheck disable=2206
declare -a DEL_TYPES=($DEL_TYPE)

# check for skip align step
if [[ "$MAX_ROUNDS" -eq "1" && "$ASSEM_REF" -eq "1" ]]; then
    ALIGN_PROGS[0]="NONE"
fi

# define ppath
TOKEN=$(perl -e '@chars = ("A".."Z","0".."9","a".."z");print $chars[int(rand(scalar(@chars)))] for 1..32;')
if [[ "$ALLOW_TMP" -eq "1" && "${GRID_ON:-0}" -eq "0" ]]; then
    if [ -z "$TMP" ]; then
        abort "ALLOW_TMP=1 but TMP var was unspecified!"
    else
        ppath="$TMP"/$(whoami)/IRMAv$VERSION/${RUN}-${TOKEN}
    fi
else
    if [[ "${GRID_PATH:-0}" != "0" && "${GRID_ON:-0}" -eq "1" ]]; then
        ppath="$GRID_PATH"/$(whoami)/IRMAv$VERSION/${RUN}-${TOKEN}
    else
        ppath="$rpath"/ppath/${RUN}-${TOKEN}
    fi
fi

# check if RUN already exists
if [ ! -d "$ppath" ]; then
    mkdir -p "$ppath" || {
        if [[ "$ALLOW_TMP" -eq "1" && "${GRID_ON:-0}" -eq "0" ]]; then
            echo "IRMA WARNING: cannot create directory '$ppath', trying: $rpath/ppath/${RUN}-${TOKEN}"
            ppath="$rpath"/ppath/${RUN}-${TOKEN}
            mkdir -p "$ppath" || abort "cannot create directory '$ppath'"
        else
            abort "cannot create directory '$ppath'"
        fi
    }
else
    abort "run already exists"
fi
original_run=$RUN

# stage paths
function createDirs() {
    local p=$1
    local d=""
    for d in MATCH SORT ALIGN ASSEMBLY GENES; do
        [ ! -d "$p/$d" ] && mkdir "$p/$d"
    done
}

function setDirs() {
    local p=$1
    matchp="$p"/MATCH
    sortp="$p"/SORT
    alignp="$p"/ALIGN
    assemp="$p"/ASSEMBLY
    refp="$p"/GENES
}
createDirs "$ppath"
setDirs "$ppath"

# global program options
SSW_OPTS="-x $SSW_X -m $SSW_M -o $SSW_O -e $SSW_E"
MM2_OPTS="-A ${MM2_A:-$SSW_M} -B ${MM2_B:-$SSW_X} -O ${MM2_O:-$SSW_O} -E ${MM2_E:-$SSW_E}"
BLAT_OPTS="-oneOff=1 -minIdentity=${BLAT_IDENTITY:-80} -tileSize=10"

# IRMA-core preprocess args

PREPROCESS_OPTS=" --min-read-quality ${QUAL_THRESHOLD:-0}"
PREPROCESS_OPTS+=" --min-length ${MIN_LEN:-0}"
[ "${USE_MEDIAN:-1}" -eq "1" ] && PREPROCESS_OPTS+=" --use-median"
if [[ "$PAIRED" -eq "1" && "$ADAPTER" != "" ]]; then
    PREPROCESS_OPTS+=" --adapter-trim $ADAPTER"
    [ "${FUZZY_ADAPTER:-1}" -eq "1" ] && PREPROCESS_OPTS+=" --a-fuzzy"
    [ "${ENFORCE_CLIPPED_LENGTH:-1}" -eq "1" ] && PREPROCESS_OPTS+=" --enforce-clipped-length"

fi

export IRMA_QUEUE

#####################
### Main Functions ###
original_path="$ppath"
original_run=$RUN
function moveProject() {
    local x=2
    local mvpath="$destination_directory"/$original_run
    [ -r "$original_path/R0.fa.bak" ] && rm "$original_path"/R0.fa.bak
    [ -r "$original_path/R0.xfl.bak" ] && rm "$original_path"/R0.xfl.bak

    time_stamp "moving project"
    if [ ! -d "$mvpath" ]; then
        mv "$original_path" "$mvpath"
    else
        local outpath="${mvpath}-V$x"
        while [ -d "$outpath" ]; do
            ((x++))
            outpath="${mvpath}-V$x"
        done
        mv "$original_path" "$outpath"
    fi
}

function die() {
    time_stamp "ERROR: $1"
    moveProject
    exit 1
}

function warn() {
    time_stamp "WARNING: $1"
}

function doLABEL() {
    local queryDB=$1     # FASTA
    local name=$2        # PROJECT
    local labelmodule=$3 # CLASSIFICATION MODULE
    local workdir=$4     # workdir
    local limit=$5       # grid work
    local Earg=0
    local n=$(grep '>' "$queryDB" -c)

    [[ "$n" -gt "$limit" && "$GRID_ON" -eq "1" ]] && Earg=2
    if [ "$n" -gt "0" ]; then
        "$LABEL" -E $Earg -W "$workdir" "$queryDB" "$name" "$labelmodule" > /dev/null 2>&1
        if [ -r "$workdir/${name}/${name}_final.tab" ]; then
            mv "$workdir/${name}/${name}_final.tab" "$workdir/SORTED_${name}.tab"
        fi
    fi
}

function doBLAT() {
    local refDB=$1
    local queryDB=$2
    local prefix=$3
    local name=$4
    local workdir=$5
    local procs=$6
    local limit=$7
    local match_length=${8:-0}
    local doGrid=0
    local script="$workdir"/${prefix}-${name}-BLAT
    local n=$(grep '>' "$queryDB" -c)

    if [[ "$n" -gt "$limit" && "$GRID_ON" -eq "1" ]]; then
        doGrid=1
    elif [ "$n" -ge "$SINGLE_LOCAL_PROC" ]; then
        procs=$SINGLE_LOCAL_PROC
    else
        procs=1
    fi

    local options=""
    if [ "$SORT_PROG" == "BLAT" ]; then
        if [ "$ALIGN_PROG" == "BLAT" ]; then
            options="-C -A"
        else
            options="-C"
        fi
    elif [[ "$SORT_PROG" == "LABEL" && "${SECONDARY_SORT:-0}" -eq "1" ]]; then
        [ -z "${GENE_GROUP}" ] && options="-T" || options="-G $GENE_GROUP"
    fi
    [ "$INCL_CHIM" -eq "1" ] && options+=" -I"
    [ "$SKIP_E" -eq "1" ] && options+=" -S"
    [ "$match_length" -gt "0" ] && options+=" -L $match_length"

    ###
    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$(printf %04d \$1)
else
	ID=\$(printf %04d \$SGE_TASK_ID)
fi
INPUT="$workdir"/${prefix}-${name}_\$ID.fasta
OUTPUT="$workdir"/${prefix}-${name}_\$ID.blat

"$BLAT" "$refDB" "\$INPUT" "\$OUTPUT" $BLAT_OPTS
"$spath"/parseBlat.pl "\$OUTPUT" "\$INPUT" $options -P "$prefix"
EOL
    ###

    chmod 755 "${script}.sh"
    "$spath"/interleavedSamples.pl --silence -G "$procs" "$queryDB" "$workdir/$prefix-$name" \
        || die "BLAT sample interleaving failed!"

    if [ "$doGrid" -eq "1" ]; then
        # Note: intentionally making optional and splitting
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "${prefix}-${name}-BLAT" -sync y -wd "$workdir" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            || {
                time_stamp "qsub of '${prefix}-${name}-BLAT' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$workdir" -j "$SINGLE_LOCAL_PROC" -q $BASH "${script}.sh" {} ::: $(seq "$procs") > /dev/null 2>&1
            }
    else
        "$PARALLEL" --nn --workdir "$workdir" -j "$procs" -q $BASH "${script}.sh" {} ::: $(seq "$procs") > /dev/null 2>&1
    fi
    rmglob "${script}.sh"
}

function doMATCH() {
    local DB=$1
    local prefix=$2
    local match_ext=""
    local name="all"
    local doGrid=0
    local procs=0
    local i=0

    if [ "$MATCH_PROG" == "BLAT" ]; then
        doBLAT "$REF_SET" "$DB" "$prefix" "$name" "$matchp" "$MATCH_PROC" "$LIMIT_BLAT" "${MIN_BLAT_MATCH:-0}" || return 1
        match_ext="blat"
    else
        time_stamp "$prefix aborted, MATCH:$MATCH_PROG not implemented"
        return 1
    fi

    time_stamp "$prefix all-match with $MATCH_PROG finished"
    # consolidate
    cat "$matchp/${prefix}-${name}"_????.match > "$matchp/${prefix}.match" &
    cat "$matchp/${prefix}-${name}"_????.nomatch > "$matchp/${prefix}.nomatch" &
    cat "$matchp/${prefix}-${name}"_????.chim > "$matchp/${prefix}.chim" &
    wait

    # LABEL fast mode consoldiation
    if [[ "${SECONDARY_SORT:-0}" -eq "1" && "$SORT_PROG" == "LABEL" && ! -z "$GENE_GROUP" ]]; then
        for i in $(echo "$GENE_GROUP" | tr ',:' ' '); do
            if test -n "$(
                shopt -s nullglob
                echo "$matchp/${prefix}-${name}"_????.match."$i"
            )"; then
                cat "$matchp/${prefix}-${name}"_????.match."$i" > "$matchp/${prefix}.match.$i" && rm "$matchp/${prefix}-${name}"_????.match."$i" &
            fi
        done
        wait
    fi

    # CLEAN UP
    rmglob "$matchp/${prefix}-${name}"_????.{match,nomatch,chim,fasta}
    cd "$matchp"
    # Note: intentionally splitting the options
    # shellcheck disable=SC2086
    tar -cf - "${prefix}-${name}"_????.$match_ext | "$ZIP" $Z_OPTS - > "$matchp/${prefix}.tar.gz" 2> /dev/null
    cd - > /dev/null
    rmglob "$matchp/${prefix}-${name}"_????.$match_ext
    time_stamp "$prefix consolidated & cleaned"

    # EXTEND to SORT
    if [ "$SORT_PROG" == "$MATCH_PROG" ]; then
        mkdir "$sortp/${prefix}"
        cat "$matchp/${prefix}-${name}"_????.class > "$sortp/$prefix/SORT_result.txt"
        rmglob "$matchp/${prefix}-${name}"_????.class
    fi

    # EXTEND to ALIGN
    if [ "$ALIGN_PROG" == "$MATCH_PROG" ]; then
        if test -n "$(
            shopt -s nullglob
            echo "$matchp"/*.sto
        )"; then
            mv "$matchp"/*.sto "$alignp"
        fi
    fi
    return 0
}

function doSORT() {
    local DB=$1
    local prefix=$2
    local -a pid=()

    if [ "$SORT_PROG" == "LABEL" ]; then
        mkdir "$sortp/${prefix}"
        if [ "${SECONDARY_SORT:-0}" -eq "1" ]; then
            #shellcheck disable=2207
            local gene_groups=($(echo "$GENE_GROUP" | tr ',:' ' '))
            #shellcheck disable=2207
            local label_modules=($(echo "$SECONDARY_LABEL_MODULES" | tr ',:' ' '))
            if [[ "${#gene_groups[@]}" -ne "${#label_modules[@]}" || "${#gene_groups}" -eq "0" ]]; then
                time_stamp "Bad LABEL module specification for SECONDARY_SORT=$SECONDARY_SORT, see: '$GENE_GROUP' and '$SECONDARY_LABEL_MODULES'"
                return 1
            fi

            local index=0
            local geneGroup=""
            for index in "${!gene_groups[@]}"; do
                geneGroup=${gene_groups[$index]}
                labelModule=${label_modules[$index]}
                groupFile="$matchp/${prefix}.match.$geneGroup"

                if [ -s "$groupFile" ]; then
                    doLABEL "$groupFile" "$MODULE-$geneGroup" "$labelModule" "$sortp/${prefix}" "$LIMIT_LABEL" &
                    pid+=($!)
                fi
            done
            wait_check "${pid[@]}" || {
                time_stamp 'LABEL sort failed, ABORTING.'
                return 1
            }
        else
            # SORT using LABEL full
            doLABEL "$matchp/${prefix}.match" "$MODULE" "$LABEL_MODULE" "$sortp/${prefix}" "$LIMIT_LABEL" || return 1
        fi
        cat "$sortp/${prefix}/SORTED"_*.tab > "$sortp/${prefix}/SORT_result.txt" 2> /dev/null
    elif [ "$SORT_PROG" == "BLAT" ]; then
        # TO-DO: assumes BLAT was run on MATCH
        :
    else
        time_stamp "$prefix aborted, SORT:$SORT_PROG not implemented"
        return 1
    fi
    time_stamp "$prefix sorted using $SORT_PROG"

    if [ ! -r "$sortp/$prefix/SORT_result.txt" ]; then
        time_stamp "$prefix aborted, sort failed"
        return 1
    fi

    x=$(grep UNRECOGNIZABLE "$sortp/$prefix/SORT_result.txt" -c)
    y=$(wc -l < "$sortp/$prefix/SORT_result.txt")
    if [[ "$x" -gt "0" && "$x" -eq "$y" ]]; then
        time_stamp "$prefix found no significant reads"
        return 1
    fi

    local options="-C $MIN_RC -D $MIN_RP"
    [ "$SORT_PROG" == "BLAT" ] && options+=" -G"
    [ "$SORT_GROUPS" != "" ] && options+=" -P $SORT_GROUPS"
    [ "$BAN_GROUPS" != "" ] && options+=" -B $BAN_GROUPS"
    # shellcheck disable=SC2086
    "$spath"/parseSORTresults.pl "$sortp/$prefix/SORT_result.txt" "$matchp/$prefix.match" "$sortp/$prefix" $options || return 1

    # CLEAN UP
    cd "$sortp/$prefix"
    # shellcheck disable=SC2086
    tar -cf - SORT_result.txt | "$ZIP" $Z_OPTS - > "$sortp/$prefix.tar.gz"
    cd - > /dev/null
    rm -rf "$sortp/${prefix:?}"
    return 0
}

function doSAM() {
    local queryDB=$1
    local prefix=$2
    local name=$3
    local workdir=$4
    local procs=$5
    local limit=$6
    local doGrid=0
    local mod="$phmms"/${name}_hmm.mod
    local script="$workdir"/$prefix-$name-SAM
    local n=$(grep '>' "$queryDB" -c)

    if [[ "$n" -gt "$limit" && "$GRID_ON" -eq "1" ]]; then
        doGrid=1
    elif [ "$n" -ge "$SINGLE_LOCAL_PROC" ]; then
        procs=$SINGLE_LOCAL_PROC
    else
        procs=1
    fi

    local options=""
    [ "$SKIP_E" -eq "1" ] && options+="-S"

    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$(printf %04d \$1)
else
	ID=\$(printf %04d \$SGE_TASK_ID)
fi
DB=${prefix}-${name}_\$ID.fasta
RUN="$workdir"/${prefix}-${name}_\$ID
MOD="$mod"

cd "$workdir"
"$A2M" "\$RUN" -modelfile "\$MOD" -db "\$DB"
"$spath"/a2mToMatchStats.pl "\$RUN.a2m" "\$RUN.sto" $options
EOL

    chmod 755 "${script}.sh"
    "$spath"/interleavedSamples.pl --silence "$queryDB" "$workdir/$prefix-$name" -G "$procs" \
        || die "SAM sample interleaving failed!"

    if [ "$doGrid" -eq "1" ]; then
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "$prefix-$name-SAM" -sync y -wd "$workdir" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            && rm "${script}.o" \
            || {
                time_stamp "qsub of '${prefix}-${name}-SAM' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$workdir" -j "$SINGLE_LOCAL_PROC" -q $BASH "${script}.sh" {} ::: $(seq "$procs") > /dev/null 2>&1
            }
    else
        "$PARALLEL" --nn --workdir "$workdir" -j "$procs" -q $BASH "${script}.sh" {} ::: $(seq "$procs") > /dev/null 2>&1
    fi
    rmglob "$workdir/${prefix}-${name}"_????.fasta "$workdir/${prefix}-${name}_"????.a2m "${script}.sh"
}

function doALIGN() {
    local prefix=$1
    local queryDB=""
    local i=0
    local last_r=$((R - 1))
    local name=""
    local gene=""
    local -a pid=()

    # FOR EACH GENE
    for queryDB in "$sortp/$prefix"-*.fa; do
        name=$(basename "$queryDB" .fa)
        name=${name#*-}

        if [ "$ALIGN_PROG" == "SAM" ]; then
            doSAM "$queryDB" "$prefix" "$name" "$alignp" "$ALIGN_PROC" "$LIMIT_SAM" &
            pid+=($!)
        elif [ "$ALIGN_PROG" == "BLAT" ]; then
            if [ "$R" -gt 1 ]; then
                for i in $(seq $last_r); do
                    rmglob "$alignp/R${i}-$name"*.sto
                    doBLAT "$refp/R${last_r}-$name.ref" "$sortp/R${i}-${name}.fa" "R$i" "$name" "$alignp" "$ALIGN_PROC" "$LIMIT_BLAT" &
                    pid+=($!)
                done
            fi
        elif [ "$ALIGN_PROG" == "NONE" ]; then
            :
        else
            time_stamp "$prefix aborted, ALIGN:$ALIGN_PROG not implemented"
            return 1
        fi
    done
    wait_check "${pid[@]}" && time_stamp "$prefix rough aligned reads using $ALIGN_PROG" || {
        time_stamp "failed $prefix rough alignment, ABORTING."
        return 1
    }

    # CREATE NEW REFERENCES
    pid=()
    local options="-C $MIN_CA -F $MIN_FA"
    local delete_options=""
    [ "$SKIP_E" -eq "1" ] && options+=" -S"
    [ "$DENOM" != "" ] && options+=" -O $DENOM"
    time_stamp "R$R DEL_TYPE=${DEL_TYPE:-default}"
    if [ "$ALIGN_PROG" != "NONE" ]; then
        for i in "$sortp/$prefix"-*.fa; do
            name=$(basename "$i" .fa)
            gene=${name#*-}
            if [ "${DEL_TYPE:-DEL}" == "REF" ]; then
                # how does this interact with elongate?
                delete_options=" -K "
                delete_options+="$refp"/R${last_r}-${gene}.ref
            elif [ "${DEL_TYPE:-DEL}" == "NNN" ]; then
                delete_options=" -A"
            else
                delete_options=""
            fi
            # shellcheck disable=SC2086
            "$spath"/combineALIGNstats.pl -N "$gene" $options $delete_options "$alignp/R"*-"${gene}"_????.sto > "$refp/$name.ref" &
            pid+=($!)

            # consider placing burden on the function itself to do the cleanup
            [[ "$ALIGN_PROG" == "BLAT" && "$R" -gt 1 ]] && rmglob "$alignp"/R*-"${gene}"_????.{match,nomatch,chim,fasta,class,blat}
        done
        wait_check "${pid[@]}" || {
            time_stamp 'failed match stat aggregation, ABORTING.'
            return 1
        }
    fi
    return 0
}

# Read gathering phase
function doRound() {
    local DB=$1
    local prefix=$2
    local n=$(grep '>' "$DB" -c)
    local x=0
    local y=0
    local gene=""
    local name=""

    time_stamp "$prefix started ($n)"

    # MATCH stage
    doMATCH "$DB" "$prefix" || return 1
    n=$(grep '>' "$matchp/${prefix}.match" -c)
    if [ "$n" -eq "0" ]; then
        time_stamp "$prefix aborted, no matches found"
        return 1
    fi

    ### SORT stage ###
    DB=$matchp/${prefix}.match
    doSORT "$DB" "$prefix" || return 1

    # PRIMARY vs SECONDARY tests
    # if we have primary data from the current round
    if test -n "$(
        shopt -s nullglob
        echo "$sortp/$prefix"-*.fa
    )"; then
        # for each current primary dataset
        for i in "$sortp/$prefix"-*.fa; do
            name=$(basename "$i" .fa)
            gene=${name#*-}
            # if previously a secondary lineage, make the current primary a secondary
            test -n "$(
                shopt -s nullglob
                echo "$sortp"/R*-"${gene}".fa.2
            )" && mv "$i" "${i}.2"
        done
    fi

    # vice-versa, if we have secondary data from this round
    if test -n "$(
        shopt -s nullglob
        echo "$sortp/$prefix-"*.fa.2
    )"; then
        # for each current secondary dataset
        for i in "$sortp/$prefix-"*.fa.2; do
            name=$(basename "$i" .fa.2)
            gene=${name#*-}
            # if previously a primary lineage, make the current secondary a primary
            # used to be ".ref"
            test -n "$(
                shopt -s nullglob
                echo "$sortp"/R*-"${gene}.fa"
            )" && mv "$i" "$sortp/$(basename "$i" .2)"
        done
    fi

    # add R0 for tracing history better
    if [[ "$ASSEM_REF" -eq "0" && "$R" -eq "1" ]]; then
        if test -n "$(
            shopt -s nullglob
            echo "$sortp/$prefix-"*.fa
        )"; then
            for i in "$sortp/$prefix-"*.fa; do
                name=$(basename "$i" .fa)
                gene=${name#*-}
                "$Lspath"/fastaExtractor.pl "$refp"/R0.refs <(echo "$gene") > "$refp/R0-${gene}.ref"
            done
        fi
    fi

    ### ALIGN stage ###
    # given we have primary data in this round
    if test -n "$(
        shopt -s nullglob
        echo "$sortp/$prefix"-*.fa
    )"; then
        doALIGN "$prefix" || return 1
    else
        time_stamp "$prefix aborted, found fewer than $MIN_RP RPs or $MIN_RC reads for all templates"
        if [ "$R" -eq 1 ]; then
            mkdir "$ppath"/low_abundance "$ppath"/other_data
            for i in "$sortp"/*2; do
                mv "$i" "$ppath/low_abundance/$(basename "$i" .2)"
            done

            if test -n "$(
                shopt -s nullglob
                echo "$ppath"/low_abundance/*UNRECOGNIZABLE*
            )"; then
                cat "$ppath"/low_abundance/*UNRECOGNIZABLE* > "$ppath"/other_data/failed_sort.fa
                rmglob "$ppath"/low_abundance/*UNRECOGNIZABLE*
            fi

            mv "$matchp"/R1.nomatch "$ppath"/other_data/unmatched_data.fa
            mv "$matchp"/R1.chim "$ppath"/other_data/chimeric.fa
            mv "$matchp"/R1.tar.gz "$ppath"/other_data/match_output.tar.gz
            mv "$sortp"/R1.tar.gz "$ppath"/other_data/sort_output.tar.gz
            echo -e "Gene\tRead Patterns\tRead Count" > "$ppath"/sorted_read_stats.txt
            cat "$sortp"/R1.txt >> "$ppath"/sorted_read_stats.txt && rm "$sortp"/R1.txt
            echo -e "Interleaved sample\tStarting reads\tQC'd reads\tQuality threshold\tMin length\tUse median" > "$ppath"/read_QC_filtering.txt
            cat "$ppath"/QC_log.txt >> "$ppath"/read_QC_filtering.txt && rm "$ppath"/QC_log.txt
            rmglob "$ppath"/R1.match.* "$ppath"/R*.{fa,xfl}
            rm -rf "$alignp" "$matchp" "$refp" "$sortp" "$assemp"
        fi
        return 1
    fi

    if [ "$ALIGN_PROG" != "NONE" ]; then
        cat "$refp/${prefix}"*.ref > "$refp/${prefix}.refs"
        REF_SET="$refp/${prefix}.refs"
        QUERY_SET="$matchp/$prefix.nomatch"
        time_stamp "$prefix references created"
    fi

    # if we have MERGE_SECONDARY set and it is after the first round, load the alt match data back in for the next round
    if [[ "${MERGE_SECONDARY:-0}" -eq "1" && "$R" -eq "1" ]]; then
        test -n "$(
            shopt -s nullglob
            echo "$sortp"/R1-*.fa.2
        )" && cat "$sortp"/R1-*.fa.2 >> "$QUERY_SET" && rm "$sortp"/R1-*.fa.2
    fi

    return 0
}

#############################################
############# ASSEMBLY SECTION ##############
#############################################

#-------------------------------------------#
#--- Polishing assembly function -----------#
#-------------------------------------------#
# shellcheck disable=SC2317,SC2329
function ASSEM_RUN_SSW() {
    local iter=$1
    local gene=$2
    local ref=$3
    local procs=$4
    local doGrid=$5
    local script="$assemp"/F${iter}-${gene}

    local options=""
    [ "$ASSEM_REF" -eq "1" ] && options+=" -G"
    [ "${SILENCE_COMPLEX_INDELS:-0}" -eq "1" ] && options+=" -S"

    ###
    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$(printf %04d \$1)
else
	ID=\$(printf %04d \$SGE_TASK_ID)
fi
FASTQ="$assemp"/${gene}_\$ID.fastq
SAM="$assemp"/F${iter}-${gene}-\$ID.sam
STAT="$assemp"/F${iter}-${gene}-\$ID.sto

"$SSW" -s -c -h -r $SSW_OPTS "$ref" "\$FASTQ" > "\$SAM" 2> /dev/null
"$spath"/winnowSAM.pl -I "\$SAM"
"$spath"/samStats.pl $options "$ref" "\$SAM" "\$STAT"
EOL
    ###

    # ASSEMBLE on grid vs local
    chmod 755 "${script}.sh"
    if [ "$doGrid" -eq "1" ]; then
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "F${iter}-$gene" -sync y -wd "$assemp" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            && rm "${script}.o" \
            || {
                time_stamp "qsub of 'F${iter}-$gene' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$ppath" -j "$SINGLE_LOCAL_PROC" -q "$BASH" "${script}.sh" {} ::: $(seq "$procs")
            }
    else
        "$PARALLEL" --nn --workdir "$ppath" -j "$procs" -q "$BASH" "${script}.sh" {} ::: $(seq "$procs")
    fi
    "$spath"/catSAMfiles.pl "$assemp/F${iter}-${gene}-"*.sam > "$assemp/F${iter}-$gene.sam"
    rm "${script}.sh"
}

# shellcheck disable=SC2317,SC2329
function ASSEM_RUN_CORE() {
    local iter=$1
    local gene=$2
    local ref=$3
    local procs=$4
    local doGrid=$5
    local script="$assemp"/F${iter}-${gene}

    local options=""
    [ "$ASSEM_REF" -eq "1" ] && options+=" -G"
    [ "${SILENCE_COMPLEX_INDELS:-0}" -eq "1" ] && options+=" -S"
    [[ "$CORE_METHOD" == "3pass" ]] && CORE_METHOD="--method 3pass" || CORE_METHOD=""

    ###
    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$(printf %04d \$1)
else
	ID=\$(printf %04d \$SGE_TASK_ID)
fi
FASTQ="$assemp"/${gene}_\$ID.fastq
SAM="$assemp"/F${iter}-${gene}-\$ID.sam
STAT="$assemp"/F${iter}-${gene}-\$ID.sto

"$IRMA_CORE" aligner --profile-from-ref --single-thread --header --ignore-n --best-match $CORE_METHOD --rev-comp $SSW_OPTS "$ref" "\$FASTQ" --output "\$SAM"
# Winnowing is not required
"$spath"/samStats.pl $options "$ref" "\$SAM" "\$STAT"
EOL
    ###

    # ASSEMBLE on grid vs local
    chmod 755 "${script}.sh"
    if [ "$doGrid" -eq "1" ]; then
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "F${iter}-$gene" -sync y -wd "$assemp" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            && rm "${script}.o" \
            || {
                time_stamp "qsub of 'F${iter}-$gene' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$ppath" -j "$SINGLE_LOCAL_PROC" -q "$BASH" "${script}.sh" {} ::: $(seq "$procs")
            }
    else
        "$PARALLEL" --nn --workdir "$ppath" -j "$procs" -q "$BASH" "${script}.sh" {} ::: $(seq "$procs")
    fi
    "$spath"/catSAMfiles.pl "$assemp/F${iter}-${gene}-"*.sam > "$assemp/F${iter}-$gene.sam"
    rm "${script}.sh"
}

# shellcheck disable=SC2317,SC2329
function ASSEM_RUN_MM2() {
    local iter=$1
    local gene=$2
    local ref=$3
    local procs=$4
    local doGrid=$5
    local script="$assemp"/F${iter}-${gene}

    local options=""
    [ "$ASSEM_REF" -eq "1" ] && options+=" -G"
    [ "${SILENCE_COMPLEX_INDELS:-0}" -eq "1" ] && options+=" -S"

    local threads=1
    # if executing locally on one file, use multiple threads
    if [[ "$doGrid" -eq "0" && "$procs" -eq "1" ]]; then
        threads=$SINGLE_LOCAL_PROC
    fi

    ###
    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$(printf %04d \$1)
else
	ID=\$(printf %04d \$SGE_TASK_ID)
fi
FASTQ="$assemp"/${gene}_\$ID.fastq
SAM="$assemp"/F${iter}-${gene}-\$ID.sam
STAT="$assemp"/F${iter}-${gene}-\$ID.sto

"$MM2" -a --secondary=no -t $threads $MM2_OPTS "$ref" "\$FASTQ" > "\$SAM" 2> /dev/null
"$spath"/winnowSAM.pl -F 14 -I "\$SAM"
"$spath"/samStats.pl $options "$ref" "\$SAM" "\$STAT"
EOL
    ###

    # ASSEMBLE on grid vs local
    chmod 755 "${script}.sh"
    if [ "$doGrid" -eq "1" ]; then
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "F${iter}-$gene" -sync y -wd "$assemp" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            && rm "${script}.o" \
            || {
                time_stamp "qsub of 'F${iter}-$gene' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$ppath" -j "$SINGLE_LOCAL_PROC" -q "$BASH" "${script}.sh" {} ::: $(seq "$procs")
            }
    else
        "$PARALLEL" --nn --workdir "$ppath" -j "$procs" -q "$BASH" "${script}.sh" {} ::: $(seq "$procs")
    fi
    "$spath"/catSAMfiles.pl "$assemp/F${iter}-${gene}-"*.sam > "$assemp/F${iter}-$gene.sam"
    rm "${script}.sh"
}

#-------------------------------------------#
#--- Scoring assembly function   -----------#
#-------------------------------------------#
# shellcheck disable=SC2329
function ASSEM_SCORE_SSW() {
    local SAM=$1
    "$spath"/scoreSAM.pl "$SAM"
}

# shellcheck disable=SC2329
function ASSEM_SCORE_MM2() {
    local SAM=$1
    "$spath"/scoreSAM.pl -F 14 "$SAM"
}
#############################################
#############################################
#############################################

function doCALLS() {
    local iter=$1
    local gene=$2
    local final=$3
    local procs=$4
    local doGrid=$5
    local ref="$assemp"/F${iter}-${gene}.ref

    # PAIRED VS SINGLE END
    if [ "$PAIRED" -eq "1" ]; then
        local prefix=M${iter}-${gene}
        local prefix2="$assemp"/M${iter}-${gene}
        local -a pstats=("-E" "$final-pairingStats.txt")
    else
        local prefix=S${iter}-${gene}
        local prefix2="$assemp"/F${iter}-${gene}
        local -a pstats=()
    fi
    local script="$assemp/${prefix}"

    #########################################
    # DO CALL STATS and MERGE if PAIRED END #
    #########################################
    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$(printf %04d \$1)
else
	ID=\$(printf %04d \$SGE_TASK_ID)
fi
SAM="$assemp"/F${iter}-${gene}-\$ID.sam
VPREFIX="$assemp"/V${iter}-${gene}-\$ID

if [ "$PAIRED" -eq "1" ];then
	MSAM="$assemp"/M${iter}-${gene}-\$ID.sam
	MPREFIX="$assemp"/M${iter}-${gene}-\$ID
	"${IRMA_CORE}" merge-sam --store-stats "$ref" "\$SAM" "\$MPREFIX"
	"$spath"/varCallStats.pl "$ref" "\$MSAM" "\$VPREFIX"
else
	"$spath"/varCallStats.pl "$ref" "\$SAM" "\$VPREFIX"
fi

EOL
    #########################################

    # GRID vs LOCAL execution of the CALL STATS step, inherits from ASSEMBLY step currently
    chmod 755 "${script}.sh"
    if [ "$doGrid" -eq "1" ]; then
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "$prefix" -sync y -wd "$assemp" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            && rm "${script}.o" \
            || {
                time_stamp "qsub of '${prefix}' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$ppath" -j "$SINGLE_LOCAL_PROC" -q $BASH "${script}.sh" {} ::: $(seq "$procs")
            }
    else
        "$PARALLEL" --nn --workdir "$ppath" -j "$procs" -q $BASH "${script}.sh" {} ::: $(seq "$procs")
    fi
    rm "${script}.sh"

    # GROUP
    "$spath"/catSAMfiles.pl "${prefix2}"-*.sam > "${final}.sam"

    # DO the CALL step
    local options="-C $MIN_C -F $MIN_F -I $MIN_FI -D $MIN_FD -Q $MIN_AQ -T $MIN_TCC -M $MIN_CONF -S $SIG_LEVEL"
    [ "$AUTO_F" -eq 1 ] && options+=" -A"
    [ "$PAIRED" -eq "1" ] && "$spath"/getPairingStats.pl "${prefix2}"-*.stats > "$final-pairingStats.txt"
    # shellcheck disable=SC2086
    "$spath"/call.pl -P -G $options "${pstats[@]}" "$ref" "$final" "$assemp/V${iter}-${gene}-"*.sto
}

function doPHASING() {
    local gene=$1
    local final=$2
    local procs=1
    local doGrid=0
    local script=${final}-phasing
    local n=$(wc -l < "${final}-variants.txt")
    ((n--))
    local ops=$((((n * n) - n) / 2))
    if [ "$n" -lt "2" ]; then
        return 0
    elif [[ "$n" -gt "$LIMIT_PHASE" && "$GRID_ON" -eq "1" ]]; then
        procs=$PHASE_PROC
        doGrid=1
    else
        doGrid=0
        if [ $ops -gt "$SINGLE_LOCAL_PROC" ]; then
            procs=$SINGLE_LOCAL_PROC
        else
            procs=1
        fi
    fi

    #########################################
    # DO PHASING			                #
    #########################################
    cat > "${script}.sh" << EOL
#!/bin/bash
LANG=$LANG
shopt -u nocaseglob;shopt -u nocasematch

if [ "\$#" -eq "1" ];then
	ID=\$1
else
	ID=\$SGE_TASK_ID
fi
PATS="$final-pats.sto"
VARS="$final-vars.sto"

"$spath"/phase.pl "$final" "\$VARS" "\$PATS" -S $procs -I \$ID
EOL
    #########################################

    chmod 755 "${script}.sh"
    if [ "$doGrid" -eq "1" ]; then
        # shellcheck disable=SC2086
        qsub $IRMA_QUEUE -t "1-$procs:1" -N "${gene}-phasing" -sync y -wd "$ppath" -j y -o "${script}.o" "${script}.sh" > /dev/null 2>&1 \
            && rm "${script}.o" \
            || {
                time_stamp "qsub of '${gene}-phasing' failed. Switching to master node."
                "$PARALLEL" --nn --workdir "$ppath" -j "$SINGLE_LOCAL_PROC" -q $BASH "${script}.sh" {} ::: $(seq "$procs") > /dev/null 2>&1
            }
    else
        "$PARALLEL" --nn --workdir "$ppath" -j "$procs" -q $BASH "${script}.sh" {} ::: $(seq "$procs") > /dev/null 2>&1
    fi
    rm "${final}-pats.sto" "${final}-vars.sto" "${script}.sh"

    local i=""
    for i in EXPENRD JACCARD MUTUALD NJOINTP; do
        cat "${final}"-*-${i}.sqm > "$final-${i}.sqm"
        "$spath"/completeMatrix.pl "$final-${i}.sqm"
        rmglob "${final}"-*-${i}.sqm
    done
}

function REFINE_ASSEMBLY_CALL_VARIANTS() {
    local gene=$1
    local procs=$2
    local doGrid=$3
    local final=$4

    if [ "$ASSEM_PROG" == "SSW" ]; then
        local ASSEM_FUNCTION="ASSEM_RUN_SSW"
        local ASSEM_SCORE="ASSEM_SCORE_SSW"
    elif [ "$ASSEM_PROG" == "MINIMAP2" ]; then
        local ASSEM_FUNCTION="ASSEM_RUN_MM2"
        local ASSEM_SCORE="ASSEM_SCORE_MM2"
    elif [ "$ASSEM_PROG" == "CORE" ]; then
        local ASSEM_FUNCTION="ASSEM_RUN_CORE"
        local ASSEM_SCORE="ASSEM_SCORE_SSW"
    else
        return 1
    fi

    # DEFINE local variables
    local log="$assemp"/$gene.log
    local COMBINE_SAM_OPTS="-N $gene -I $INS_T -D $DEL_T -A $MIN_FA -C $MIN_CA -i ${INS_T_DEPTH:-1} -d ${DEL_T_DEPTH:-1}"
    [ "${MIN_DROPOUT_EDGE_DEPTH:-0}" -gt "0" ] && COMBINE_SAM_OPTS+=" -E $MIN_DROPOUT_EDGE_DEPTH"
    local theRef=""
    local -a theStats=()
    local newRef=""
    local theBAM=""
    local theSAM=""
    local lastScore=0
    local currentScore=0
    local iter=1

    for ((iter = 1; iter <= MAX_ITER_ASSEM; iter++)); do
        # perform current assembly
        theRef="$assemp"/F${iter}-$gene.ref
        $ASSEM_FUNCTION $iter "$gene" "$theRef" "$procs" "$doGrid"

        # convert SAM to BAM
        theSAM="$assemp"/F${iter}-$gene.sam
        theBAM="$assemp"/F${iter}-$gene.bam
        "$SAMTOOLS" view -bS "$theSAM" > "$theBAM" 2> "$theBAM.log" \
            && rm "$theBAM.log" \
            || {
                warn "writing to '$theBAM' failed"
                cat "$theBAM.log"
            }

        # save last score & take current score
        lastScore=$currentScore
        currentScore=$($ASSEM_SCORE "$theSAM")
        echo -e "$currentScore\t$theSAM" >> "$log"

        theStats=("$assemp/F${iter}-$gene-"*.sto)
        if [ "$lastScore" -lt "$currentScore" ]; then
            if [ "$iter" -lt "$MAX_ITER_ASSEM" ]; then
                # generate next reference
                newRef="$assemp/F$((iter + 1))-$gene.ref"
                # shellcheck disable=SC2086
                "$spath"/combineSAMstats.pl "$theRef" $COMBINE_SAM_OPTS "${theStats[@]}" > "$newRef" || return 1
            fi
            rm "${theStats[@]}"
        else
            # we are finished
            rm "${theStats[@]}"
            break
        fi
    done

    # CALL variants
    if [ "$iter" -gt "$MAX_ITER_ASSEM" ]; then
        ((iter--))
        doCALLS "$MAX_ITER_ASSEM" "$gene" "$final" "$procs" "$doGrid" || return 1
    else
        if [ "$lastScore" -gt "$currentScore" ]; then
            rm "$theRef" "$theSAM" "$theBAM"
            ((iter--))
        fi
        doCALLS $iter "$gene" "$final" "$procs" "$doGrid" || return 1
    fi

    # CLEAN UP
    rmglob "$assemp/${gene}"_????.fastq "$assemp"/*-"${gene}"-????.*
    time_stamp "finished $gene (F$iter)"
    return 0
}

function ASSEMBLE_REPORT() {
    local gene=$1
    local final="$ppath"/$gene
    local fastq="$assemp"/$gene.fastq
    local size=$(($(wc -l < "$fastq") / 4))
    local effective_size=$size
    local doGrid=0
    local procs=0

    # Accounts for pairs instead of records when data is sparse for PE
    [ "$PAIRED" -eq "1" ] && effective_size=$((size / 2))

    # SCOPE
    if [[ "$size" -gt "$LIMIT_SSW" && "$effective_size" -gt "$ASSEM_PROC" && "$GRID_ON" -eq "1" ]]; then
        procs=$ASSEM_PROC
        doGrid=1
    else
        doGrid=0
        if [ "$effective_size" -gt "$SINGLE_LOCAL_PROC" ]; then
            procs=$SINGLE_LOCAL_PROC
        else
            procs=1
        fi
    fi

    # ITERATIVE REFINEMENT of ASSEMBLY chained to CALLING and PHASING
    if [ "$PAIRED" -eq "1" ]; then
        "$spath"/interleavedSamples.pl --silence --underscore-header -G "$procs" -P "$fastq" "$assemp"/"$gene" \
            || die "FastQ sample interleaving failed!"
    else
        "$spath"/interleavedSamples.pl --silence -G "$procs" -Q "$fastq" "$assemp"/"$gene" \
            || die "FastQ sample interleaving failed!"
    fi
    time_stamp "started $gene ($size)"
    REFINE_ASSEMBLY_CALL_VARIANTS "$gene" "$procs" $doGrid "$final" || return 1
    doPHASING "$gene" "$final" || return 1

    local n=$(wc -l < "${final}-variants.txt") \
        || {
            time_stamp "missing variant file for $gene"
            return 1
        }
    # shellcheck disable=SC2086 ## Note: PHASE_DISTANCE is optional and experimental ##
    "$IRMA_CORE" phase "${final}-variants.txt" "${final}-EXPENRD.sqm" ${PHASE_DISTANCE:+-t "$PHASE_DISTANCE"}
    if [ "$n" -gt "1" ]; then
        "$RSCRIPT" --vanilla "$spath"/coverageDiagram.R "$RUN" "$gene" "$final-coverage.txt" "$final-variants.txt" "$final-pairingStats.txt" "$final-coverageDiagram.pdf" > /dev/null 2>&1
        if [ "$n" -gt "2" ]; then
            "$PARALLEL" --nn -q "$RSCRIPT" --vanilla "$spath"/sqmHeatmap.R "${final}"-{}.sqm "${final}"-{}.pdf 2 ::: EXPENRD JACCARD MUTUALD NJOINTP > /dev/null
        fi
    else
        "$RSCRIPT" --vanilla "$spath"/simpleCoverageDiagram.R "$RUN" "$gene" "$final-coverage.txt" "$final-coverageDiagram.pdf" > /dev/null 2>&1
    fi

    "$RSCRIPT" --vanilla "$spath"/heuristicDiagram.R "$MIN_AQ" "$MIN_F" "$MIN_TCC" "$MIN_CONF" "${final}-allAlleles.txt" "${final}-heuristics.pdf" > /dev/null 2>&1

    local options="-C $MIN_C -N $RUN -F $MIN_AMBIG"
    local acpath="$ppath"/amended_consensus
    [ "$SEG_NUMBERS" != "" ] && options+=" -S $SEG_NUMBERS"
    [[ "$SORT_GROUPS" == "" && "${NONSEGMENTED:-1}" -eq "0" ]] && options+=" -H"
    if [[ "$MIN_CONS_SUPPORT" != "" || "$MIN_CONS_QUALITY" != "" ]]; then
        options+=" -c ${final}-coverage.txt"
        [ "$MIN_CONS_SUPPORT" != "" ] && options+=" --min-consensus-support ${MIN_CONS_SUPPORT:-1}"
        [ "$MIN_CONS_QUALITY" != "" ] && options+=" --min-consensus-quality ${MIN_CONS_QUALITY:-0}"
    fi

    # shellcheck disable=SC2086
    "$spath"/amendConsensus.pl -P "$acpath" "${final}.fasta" "${final}-variants.txt" $options
    if [[ "${ALIGN_AMENDED:-0}" -eq "1" && -s "$phmms/${gene}_hmm.mod" && -s "${final}.fasta" ]]; then
        cd "$ppath"
        "$A2M" "$gene" -db "${final}.fasta" -modelfile "$phmms/${gene}_hmm.mod" -sw 0 > /dev/null 2>&1
        cd - > /dev/null

        # shellcheck disable=SC2086
        local amended_file=$("$spath"/amendConsensus.pl -P "$acpath" "${final}.a2m" "${final}-variants.txt" $options --print-name)
        options+=" --a2m-reference -o ${final}-coverage.a2m.txt"

        # shellcheck disable=SC2086
        "$spath"/amendConsensus.pl -P "$acpath" "${final}.a2m" "${final}-variants.txt" $options
        if [[ "$ASSEM_REF" -eq "1" && "${PADDED_CONSENSUS:-1}" -eq "1" ]]; then
            "$spath"/amplicon_dropout_pad.pl -D -T -A -C "${final}-coverage.a2m.txt" -O "${final}-coverage.pad.txt" \
                "$acpath/${amended_file}.a2m" "$assemp/F1-${gene}.sam" > "$acpath/${amended_file}.pad.fa"
        fi
    fi

    if [[ -r "${final}-insertions.txt" && -r "${final}-deletions.txt" ]]; then
        local options="-C $MIN_C -F $MIN_F -I $MIN_FI -D $MIN_FD -Q $MIN_AQ -T $MIN_TCC -M $MIN_CONF -S $SIG_LEVEL"
        [ "$AUTO_F" -eq 1 ] && options+=" -A"

        # shellcheck disable=SC2086
        "$spath"/vcfGenerator.pl $options "${final}.fasta" "${final}-allAlleles.txt" "${final}-insertions.txt" "${final}-deletions.txt" > "${final}.vcf"
    fi

    if [ -s "${final}-coverage.a2m.txt" ]; then
        local tbl=""
        for tbl in variants insertions deletions allAlleles; do
            if [ -s "${final}-${tbl}.txt" ]; then
                "$spath"/leftjoin.pl -H -F 1,2 -I "${final}-${tbl}.txt" <(cut -f1-2,9-10 "${final}-coverage.a2m.txt")
            fi
        done
    fi

    "$SAMTOOLS" view -bS "${final}.sam" > "${final}.bam" 2> "${final}.bam.log" \
        && "$SAMTOOLS" sort "${final}.bam" -o "${final}.bam" >> "${final}.bam.log" 2>&1 \
        && "$SAMTOOLS" index "${final}.bam" >> "${final}.bam.log" 2>&1 \
        && rm "${final}.bam.log" \
        || {
            warn "writing or manipulating '${final}.bam' failed."
            cat "${final}.bam.log"
        }
    rmglob "$assemp"/F*-"$gene.sam"
    return 0
}

function doPostProcessing() {
    local -a pid=()
    local i=""

    # if failed to assemble anything, skip post processing
    [ -d "$ppath/other_data" ] && return 0

    # Create directory for the phases
    mkdir "$ppath"/intermediate

    # INFLATE TO FASTQ
    if [ "$ASSEM_REF" -eq "1" ]; then
        if test -n "$(
            shopt -s nullglob
            echo "$refp"/R0-*.ref
        )"; then
            for ref in "$refp"/R0-*.ref; do
                name=$(basename "$ref" .ref)
                gene=${name#*-}
                if test -n "$(
                    shopt -s nullglob
                    echo "$sortp"/R*-"$gene.fa"
                )"; then
                    cat "$sortp"/R*-"$gene.fa" > "$assemp/$gene.fa"
                    "$IRMA_CORE" xflate --inflate "$ppath"/R0.xfl "$assemp/$gene.fa" > "$assemp/$gene.fastq" &
                    pid+=($!)
                    "$spath"/ordinalHeaders.pl -O "$gene" "$ref" > "$assemp/F1-$gene.ref"
                fi
            done
        fi
    else
        if test -n "$(
            shopt -s nullglob
            echo "$refp"/R[1-9].refs
        )"; then
            # -G gets rid of alternative references
            for name in $("$spath"/finalRefs.pl -G "$refp"/R[1-9].refs); do
                gene=${name#*-}
                cat "$sortp"/R*"-$gene.fa" > "$assemp/$gene.fa"
                "$IRMA_CORE" xflate --inflate "$ppath"/R0.xfl "$assemp/$gene.fa" > "$assemp/$gene.fastq" &
                pid+=($!)
                head -n2 "$refp/$name.ref" > "$assemp/F1-$gene.ref"
            done
        fi
    fi
    wait_check "${pid[@]}" && time_stamp 'converted back to fastq' || die 'fastq conversion failed'

    # SAVE unmatched read patterns
    cd "$matchp"
    local LAST_ROUND=$((R <= MAX_ROUNDS ? R : MAX_ROUNDS))

    # shellcheck disable=SC2086
    tar -cf - R${LAST_ROUND}.nomatch | "$ZIP" $Z_OPTS - > "$ppath"/unmatched_read_patterns.tar.gz &
    cd - > /dev/null
    if [ "$ALIGN_PROG" != "NONE" ]; then
        cd "$alignp"

        # shellcheck disable=SC2086
        tar -cf - R*.sto 2> /dev/null | "$ZIP" $Z_OPTS - > storedCounts.tar.gz &
        cd - > /dev/null
    fi
    wait
    time_stamp 'saved unmatched read patterns'

    # CLEAN UP
    if [[ "${SECONDARY_SORT:-0}" -eq "1" && "$SORT_PROG" == "LABEL" ]]; then
        rmglob "$matchp"/R?.match.*
    fi

    ### READ COUNTS ###
    echo -e "Record\tReads\tPatterns\tPairsAndWidows" >> "$ppath"/READ_COUNTS.txt
    local raw1=$(grep OBSERVED_RAW_READS_OR_R1 "$ppath"/QC_log.txt | cut -f2)
    local raw2=0
    if [ "$PAIRED" -eq "1" ]; then
        raw2=$(grep OBSERVED_R2_READS "$ppath"/QC_log.txt | cut -f2)
    fi

    local initial=$((raw1 + raw2))
    local passing_qc=$(grep READ_COUNT_PASSING_ALL_QUALITY_CONTROL_FILTERS "$ppath"/QC_log.txt | cut -f2)
    local failqc=$((initial - passing_qc))
    local passing_pats=$(grep READ_PATTERN_COUNT_PASSING "$ppath"/QC_log.txt | cut -f2)
    {
        echo -e "0-R1\t${raw1}\tNA\tNA"
        echo -e "0-R2\t${raw2}\tNA\tNA"
        echo -e "1-initial\t${initial}\tNA\tNA"
        echo -e "2-failQC\t${failqc}\tNA\tNA"
        echo -e "2-passQC\t${passing_qc}\t${passing_pats}\tNA"
    } >> "$ppath"/READ_COUNTS.txt

    read -r raw1 pats1 <<< "$(grep '>' "$matchp"/*.chim | cut -f2 -d'%' | "$spath"/sumField.pl -S)"
    read -r raw2 pats2 <<< "$(grep '>' "$matchp"/R${LAST_ROUND}.nomatch | cut -f2 -d'%' | "$spath"/sumField.pl -S)"
    echo -e "3-chimeric\t${raw1}\t${pats1}\tNA" >> "$ppath"/READ_COUNTS.txt
    echo -e "3-nomatch\t${raw2}\t${pats2}\tNA" >> "$ppath"/READ_COUNTS.txt

    if test -n "$(
        shopt -s nullglob
        echo "$sortp"/*UNRECOGNIZABLE*
    )"; then
        read -r raw pats <<< "$(grep '>' "$sortp"/*UNRECOGNIZABLE* | cut -f2 -d'%' | "$spath"/sumField.pl -S)"
        echo -e "3-unrecognizable\t${raw}\t${pats}\tNA" >> "$ppath"/READ_COUNTS.txt
    fi
    ### END COUNTS ###

    grep '>' "$matchp"/R?.{match,nomatch,chim} "$ppath"/R0.fa -c > "$ppath"/NR_COUNTS_log.txt
    [ "$ALIGN_PROG" != "NONE" ] && rmglob "$alignp"/R*.sto &
    rmglob "$matchp"/R*.{match,chim} &
    rmglob "$sortp"/R*.fa "$ppath"/R0.fa &
    wait

    [ ! -d "$ppath"/amended_consensus ] && mkdir "$ppath"/amended_consensus

    pid=()
    # POLISH the final assembly & do variant calls/phasing
    if test -n "$(
        shopt -s nullglob
        echo "$assemp"/F1-*.ref
    )"; then
        time_stamp "performing final assembly with $ASSEM_PROG"
        for ref1 in "$assemp"/F1-*.ref; do
            name=$(basename "$ref1" .ref)
            gene=${name#*-}
            if [ -s "$assemp/${gene}.fastq" ]; then
                ASSEMBLE_REPORT "$gene" &
                pid+=($!)
            else
                time_stamp "skipping $gene assembly, no fastq"
            fi
        done
        wait_check "${pid[@]}" || die "final assembly failed"
        time_stamp 'variants phased, BAM files created'

        for i in "$ppath"/*bam; do
            pairs=$("$SAMTOOLS" view -c "$i") || warn "SAMTOOLS encountered an error counting reads for '$i'"

            gene=$(basename "$i" .bam)
            read -r raw pats <<< "$(grep '>' "$assemp/${gene}.fa" | cut -f2 -d'%' | "$spath"/sumField.pl -S)"
            echo -e "4-${gene}\t${raw}\t${pats}\t${pairs}" >> "$ppath"/READ_COUNTS.txt.tmp
            echo "${i}:$pairs" >> "$ppath"/READ_log.txt
        done

        cd "$assemp"
        if [ "${PACKAGED_FASTQ:-1}" -eq "1" ]; then
            # shellcheck disable=SC2086
            tar -cf - ./*.fastq | "$ZIP" $Z_OPTS - > "$assemp"/reads.tar.gz
        else
            # shellcheck disable=SC2086
            "$ZIP" $Z_OPTS ./*.fastq
        fi
        cd - > /dev/null
        grep '>' "$assemp"/*.fa -Hc >> "$ppath"/NR_COUNTS_log.txt
        rmglob "$assemp"/*.fastq "$assemp"/*.fa

        [ ! -d "$ppath"/matrices ] && mkdir "$ppath"/matrices
        [ -n "$(
            shopt -s nullglob
            echo "$ppath"/*.sqm
        )" ] && mv "$ppath"/*.sqm "$ppath"/matrices

        echo -e "Total Score\tAlignment" > "$ppath"/ASSEMBLY_log.txt
        cat "$assemp"/*.log >> "$ppath"/ASSEMBLY_log.txt
        rmglob "$assemp"/*.log "$ppath"/*.sam
    else
        time_stamp "skipping final assembly, no reference files found"
        touch "$ppath"/READ_COUNTS.txt.tmp
    fi

    # handle logs
    mkdir "$ppath"/logs
    mv "$ppath"/*log.txt "$ppath"/logs
    # shellcheck disable=SC2034
    [ -r "$rpath"/.commit ] && LAST_COMMIT=$(cat "$rpath"/.commit) || LAST_COMMIT="NOT_AVAILBLE"
    source "$spath"/run_info.sh
    source "$spath"/run_config.sh

    # final order
    mkdir "$ppath"/secondary
    mv "$ppath"/unmatched_read_patterns* "$ppath"/secondary

    # deposit secondary data
    if test -n "$(
        shopt -s nullglob
        echo "$sortp"/*.fa.2
    )"; then
        for i in "$sortp"/*.fa.2; do
            mv "$i" "$ppath/secondary/$(basename "$i" .2)"
        done

        # alternative counts
        for gene in $("$PARALLEL" --nn -q echo "{/.}" ::: "$ppath"/secondary/*.fa | cut -f2- -d'-' | sort | uniq | grep -v UNRECOGNIZABLE); do
            read -r raw pats <<< "$(grep -h '>' "$ppath"/secondary/*-"${gene}.fa" | cut -f2 -d'%' | "$spath"/sumField.pl -S)"
            echo -e "5-$gene\t${raw}\t${pats}\tNA" >> "$ppath"/READ_COUNTS.txt.tmp
        done
    fi

    if [ "$(grep '^4-' "$ppath"/READ_COUNTS.txt.tmp -c)" -gt "0" ]; then
        x=$(grep '^4-' "$ppath"/READ_COUNTS.txt.tmp | "$spath"/sumField.pl -F 2)
        y=$(grep '^4-' "$ppath"/READ_COUNTS.txt.tmp | "$spath"/sumField.pl -F 3)
        z=$(grep '^4-' "$ppath"/READ_COUNTS.txt.tmp | "$spath"/sumField.pl -F 4)
        echo -e "3-match\t$x\t$y\t$z" >> "$ppath"/READ_COUNTS.txt
    fi

    if [ "$(grep '^5-' "$ppath"/READ_COUNTS.txt.tmp -c)" -gt "0" ]; then
        x=$(grep '^5-' "$ppath"/READ_COUNTS.txt.tmp | "$spath"/sumField.pl -F 2)
        y=$(grep '^5-' "$ppath"/READ_COUNTS.txt.tmp | "$spath"/sumField.pl -F 3)
        echo -e "3-altmatch\t$x\t$y\tNA" >> "$ppath"/READ_COUNTS.txt
    fi

    # Check to see if a residual assembly may be possible, if so, store data
    if [[ "$(grep '3-match' "$ppath"/READ_COUNTS.txt -c)" -gt "0" && "$(grep '3-nomatch' "$ppath"/READ_COUNTS.txt -c)" -gt "0" ]]; then
        local cntmatch=$(grep '3-match' "$ppath"/READ_COUNTS.txt | cut -f3)
        local altmatch=$(grep '3-altmatch' "$ppath"/READ_COUNTS.txt | cut -f3)
        local factor=$((${cntmatch:-1} / ${altmatch:-1}))
        if [ "${factor:-0}" -lt "${RESIDUAL_ASSEMBLY_FACTOR:-400}" ]; then
            time_stamp "residual read pattern ratio non-trivial (${factor:-0}x < ${RESIDUAL_ASSEMBLY_FACTOR:-400}x)"
            local alt_ppath="$original_path"/residual_assembly
            mkdir "$alt_ppath"
            createDirs "$alt_ppath"
            ln "$ppath"/R0.xfl.bak "$alt_ppath"/R0.xfl
            ln "$ppath"/logs/QC_log.txt "$alt_ppath"/QC_log.txt
            ln "$refp"/R0.refs "$alt_ppath"/GENES/R0.refs
            cp "$matchp"/R${LAST_ROUND}.nomatch "$alt_ppath"/R0.fa
            catglob "$ppath"/secondary/R*.fa >> "$alt_ppath"/R0.fa
            grep '3-chimeric' "$ppath"/READ_COUNTS.txt >> "$alt_ppath"/READ_COUNTS.txt.prim
            grep '3-match' "$ppath"/READ_COUNTS.txt | perl -pe '$_ =~ s/match/altmatch/' >> "$alt_ppath"/READ_COUNTS.txt.prim

            [ ! -s "$alt_ppath/R0.fa" ] && rm -rf "$alt_ppath" && time_stamp "No data found for 'residual_assembly'"
        fi
    fi

    # adjust counts, prim is from primary if this is secondary
    [ -r "$ppath/READ_COUNTS.txt.prim" ] && cat "$ppath"/READ_COUNTS.txt.prim >> "$ppath"/READ_COUNTS.txt && rm "$ppath"/READ_COUNTS.txt.prim
    [ -r "$ppath/READ_COUNTS.txt.tmp" ] && cat "$ppath"/READ_COUNTS.txt.tmp >> "$ppath"/READ_COUNTS.txt && rm "$ppath"/READ_COUNTS.txt.tmp
    "$RSCRIPT" --vanilla "$spath"/percentages.R "$RUN" "$ppath"/READ_COUNTS.txt "$ppath"/READ_PERCENTAGES.pdf $PAIRED > /dev/null 2>&1

    # save figures
    [ ! -d "$ppath"/figures ] && mkdir "$ppath"/figures
    [ -n "$(
        shopt -s nullglob
        echo "$ppath"/*.pdf
    )" ] && mv "$ppath"/*.pdf "$ppath"/figures

    # save tables
    [ ! -d "$ppath"/tables ] && mkdir "$ppath"/tables
    [ -n "$(
        shopt -s nullglob
        echo "$ppath"/*.txt
    )" ] && mv "$ppath"/*.txt "$ppath"/tables

    rmglob "$ppath"/R0.xfl "$matchp"/R*.nomatch "$refp"/R*.refs
    mv "$refp" "$ppath/intermediate/0-ITERATIVE-REFERENCES"
    mv "$matchp" "$ppath/intermediate/1-MATCH_$MATCH_PROG"
    mv "$sortp" "$ppath/intermediate/2-SORT_$SORT_PROG"
    mv "$alignp" "$ppath/intermediate/3-ALIGN_$ALIGN_PROG"
    mv "$assemp" "$ppath/intermediate/4-ASSEMBLE_$ASSEM_PROG"
}

function setPrograms() {
    local i=$((R - 1))

    BLAT_OPTS="-oneOff=1 -minIdentity=${BLAT_IDENTITY:-80} -tileSize=10"
    if [ -z "${MATCH_PROGS[$i]}" ]; then
        MATCH_PROG=${MATCH_PROGS[${#MATCH_PROGS[@]} - 1]}
    elif [[ "${MATCH_PROGS[$i]}" =~ ^(BLAT)$ ]]; then
        MATCH_PROG=${MATCH_PROGS[$i]}
    else
        MATCH_PROG="BLAT"
        MATCH_PROGS[i]="BLAT"
        time_stamp "match program '${MATCH_PROGS[$i]}' requested but not found. Using $MATCH_PROG"
    fi

    if [ -z "${SORT_PROGS[$i]}" ]; then
        SORT_PROG=${SORT_PROGS[${#SORT_PROGS[@]} - 1]}
    elif [[ "${SORT_PROGS[$i]}" =~ ^(BLAT|LABEL)$ ]]; then
        SORT_PROG=${SORT_PROGS[$i]}
    else
        SORT_PROG="BLAT"
        SORT_PROGS[i]="BLAT"
        time_stamp "sort program '${SORT_PROGS[$i]}' requested but not found. Using $SORT_PROG"
    fi

    if [ -z "${ALIGN_PROGS[$i]}" ]; then
        ALIGN_PROG=${ALIGN_PROGS[${#ALIGN_PROGS[@]} - 1]}
    elif [[ "${ALIGN_PROGS[$i]}" =~ ^(BLAT|SAM|NONE)$ ]]; then
        ALIGN_PROG=${ALIGN_PROGS[$i]}
    else
        ALIGN_PROG="SAM"
        ALIGN_PROGS[i]="SAM"
        time_stamp "align program '${ALIGN_PROGS[$i]}' requested but not found. Trying $ALIGN_PROG"
    fi

    # check for profiles
    if [[ "$phmms" == "none" && "$ALIGN_PROG" == "SAM" ]]; then
        time_stamp "align program is SAM but cannot find pHMM directory. Using BLAT"
        ALIGN_PROG="BLAT"
        ALIGN_PROGS[i]="BLAT"
    fi

    if [ "$ASSEM_PROG" == "MINIMAP2" ]; then
        if [ "$(perl -se '$t=shift;print ($t != 1.0 ? 1 : 0)' "${MIN_FA:-0}")" -eq "1" ]; then
            time_stamp "$ASSEM_PROG may not be used with alternative consensus. Turning it off."
            MIN_FA=1.0
        fi
    fi

    if [ -z "${DEL_TYPES[0]}" ]; then
        if [ "$ALIGN_PROG" == "BLAT" ]; then
            DEL_TYPE="NNN"
        else
            DEL_TYPE="DEL"
        fi
    elif [ -z "${DEL_TYPES[$i]}" ]; then
        DEL_TYPE=${DEL_TYPES[${#DEL_TYPES[@]} - 1]}
    elif [[ "${DEL_TYPES[$i]}" =~ ^(NNN|REF|DEL)$ ]]; then
        DEL_TYPE=${DEL_TYPES[$i]}
    else
        DEL_TYPE="NNN"
        DEL_TYPES[i]="NNN"
        time_stamp "deletion type '${DEL_TYPES[$i]}' requested but not found. Trying $DEL_TYPE"
    fi
}

### START ####
time_stamp "started run '$RUN'"
[ -r "$mpath/primers" ] && time_stamp "NOTE: primer-trimmed reads expected as input for amplicon sequencing"

# optionally check for disk space
if [[ "${ALLOW_DISK_CHECK:-1}" == "1" && "${LEFT:(-3)}" == ".gz" ]]; then
    MSG="$("$spath"/disk_check.pl "$ppath" "$LEFT" "$RIGHT")"
    STATUS=$?
    if [ "$STATUS" -eq "0" ]; then
        time_stamp "$MSG"
    else
        [ "$STATUS" -eq "3" ] && abort "$MSG" || abort "disk check failed!"
    fi
fi

if [ "$PAIRED" -eq "1" ]; then
    # shellcheck disable=SC2086
    "$IRMA_CORE" preprocess $PREPROCESS_OPTS --log-file "$ppath"/QC_log.txt \
        "$ppath"/R0.xfl "$LEFT" "$RIGHT" > "$ppath"/R0.fa
else
    # shellcheck disable=SC2086
    "$IRMA_CORE" preprocess $PREPROCESS_OPTS --log-file "$ppath"/QC_log.txt \
        "$ppath"/R0.xfl "$LEFT" > "$ppath"/R0.fa
fi
time_stamp 'pre-processed'

# Turn off paired-end read merging.
if [[ "$PAIRED" -eq "1" && "$NO_MERGE" -eq "1" ]]; then
    time_stamp "WARNING, setting NO_MERGE for PE reads. Coverage depth will appear inflated."
    PAIRED=0
fi

if [ ! -s "$ppath"/R0.fa ]; then
    time_stamp "found no QC'd data, review QC_log.txt"
    moveProject
    time_stamp 'finished!'
    exit
fi

setPrograms # set the programs

# Guard the final assembler program
if [[ "$ASSEM_PROG" != "SSW" && "$ASSEM_PROG" != "MINIMAP2" && "$ASSEM_PROG" != "CORE" ]]; then
    time_stamp "final assembly program '$ASSEM_PROG' requested but not found. Using SSW"
    ASSEM_PROG="SSW"
elif [[ "$ASSEM_PROG" == "CORE" ]]; then
    warn "final assembly program '$ASSEM_PROG' is experimental and may be removed in the future."
fi

if [[ "$ASSEM_REF" -eq "1" && "$NO_SORT_REFS" -eq "1" ]]; then
    "$spath"/partitionByField.pl -H -X .ref "$REF_SET" "$refp"/R0-
    time_stamp 'using supplied reference set as-is'
elif [ "$ASSEM_REF" -eq "1" ]; then
    mkdir "$sortp"/R0
    # Sort out the references
    if [ "$SORT_PROG" == "LABEL" ]; then
        doLABEL "$REF_SET" "$MODULE" "$LABEL_MODULE" "$sortp"/R0 "$LIMIT_LABEL"
        cat "$sortp"/R0/SORTED_*.tab > "$sortp"/R0/SORT_result.txt 2> /dev/null
    elif [ "$SORT_PROG" == "BLAT" ]; then
        if [ -r "$DEF_SET" ]; then
            doBLAT "$DEF_SET" "$REF_SET" R0 "$MODULE" "$sortp"/R0 "$SINGLE_LOCAL_PROC" "$LIMIT_BLAT"
            cat "$sortp/R0/R0-${MODULE}"_????.class > "$sortp"/R0/SORT_result.txt
            rmglob "$sortp/R0/R0-${MODULE}"_????.class
            if [ ! -s "$sortp/R0/SORT_result.txt" ]; then
                time_stamp "BLAT could not identify any references, trying LABEL."
                doLABEL "$REF_SET" "$MODULE" "$LABEL_MODULE" "$sortp"/R0 "$LIMIT_LABEL"
                cat "$sortp"/R0/SORTED_*.tab > "$sortp"/R0/SORT_result.txt
                [ ! -s "$sortp/R0/SORT_result.txt" ] && die 'LABEL could not identify the custom reference either, exiting'
            fi
        else
            die 'no default classification set found'
        fi
    fi

    options=""
    [ "$SORT_PROG" == "BLAT" ] && options+=" -G"
    [ "$BAN_GROUPS" != "" ] && options+=" -B $BAN_GROUPS"

    # shellcheck disable=SC2086
    "$spath"/parseSORTresults.pl "$sortp"/R0/SORT_result.txt "$REF_SET" "$refp"/R0 $options
    cd "$sortp"/R0

    # shellcheck disable=SC2086
    tar -cf - SORT_result.txt | "$ZIP" $Z_OPTS - > "$sortp"/R0.tar.gz
    cd - > /dev/null
    rm -rf "$sortp"/R0

    if test -n "$(
        shopt -s nullglob
        echo "$refp"/R0-*.fa
    )"; then
        for fasta in "$refp"/R0-*.fa; do
            name=$(basename "$fasta" .fa)
            gene=${name#*-}
            "$spath"/ordinalHeaders.pl -N "$gene" "$fasta" > "$refp/${name}.ref"
        done
        cat "$refp"/R0-*.ref > "$refp"/R0.refs
        rmglob "$refp"/R0-*.fa
        REF_SET="$refp"/R0.refs
        time_stamp 'sorted user supplied reference set'
    else
        time_stamp "run aborted, sequences in '$REF_SET' not ${MODULE}-like"
        moveProject
        time_stamp 'finished!'
        exit
    fi
elif [ "$REF_SET" != "$refp/R0.refs" ]; then
    cp "$REF_SET" "$refp"/R0.refs && REF_SET="$refp"/R0.refs
fi

# Backup primary data and begin normal assembly
ln "$ppath"/R0.fa "$ppath"/R0.fa.bak
ln "$ppath"/R0.xfl "$ppath"/R0.xfl.bak
QUERY_SET="$ppath"/R0.fa
for ((R = 1; R <= MAX_ROUNDS; R++)); do
    setPrograms
    doRound "$QUERY_SET" "R$R" || break
    MIN_RC=1
    MIN_RP=1
    [ ! -s "$QUERY_SET" ] && break
done
doPostProcessing || die 'aborted'

# Take data in the secondary folder, if it exists, and do another round of assembly
if [ -s "$ppath/residual_assembly/R0.fa" ]; then
    time_stamp "attempting RESIDUAL assembly"
    ppath="$ppath"/residual_assembly
    REF_SET="$ppath"/GENES/R0.refs
    QUERY_SET="$ppath"/R0.fa
    R=1
    RUN="${original_run}-residual"
    setDirs "$ppath"

    SORT_GROUPS=""
    RESIDUAL_ASSEMBLY_FACTOR=0
    MIN_RP=${MIN_RP_RESIDUAL:-150}
    MIN_RC=${MIN_RC_RESIDUAL:-150}

    while [ "$R" -le "$MAX_ROUNDS" ]; do
        setPrograms
        doRound "$QUERY_SET" "R$R" || break
        MIN_RC=1
        MIN_RP=1

        if [ -s "$QUERY_SET" ]; then
            ((R++))
        else
            break
        fi
    done

    doPostProcessing || die 'aborted'
fi

# Do secondary assembly if able
if [[ "${DO_SECONDARY:-0}" -eq "1" && -n "$(
    shopt -s nullglob
    echo "$original_path"/residual_assembly/*.fasta
)" ]]; then
    time_stamp "performing SECONDARY assembly"
    ppath="$original_path"/secondary_assembly
    mkdir "$ppath"
    createDirs "$ppath" && setDirs "$ppath"
    R=1
    RUN="${original_run}-secondary"

    SORT_GROUPS=""
    RESIDUAL_ASSEMBLY_FACTOR=0
    MIN_RP=${MIN_RP_RESIDUAL:-150}
    MIN_RC=${MIN_RC_RESIDUAL:-150}
    BLAT_IDENTITY=96
    MATCH_PROG=BLAT
    MATCH_PROGS=(BLAT)
    SORT_PROG=BLAT
    SORT_PROGS=(BLAT)
    ALIGN_PROG=BLAT
    ALIGN_PROGS=(BLAT)
    SKIP_E=1
    REF_SET="$ppath"/GENES/R0.refs
    QUERY_SET="$ppath"/R0.fa

    ln "$original_path"/R0.fa.bak "$ppath"/R0.fa
    ln "$original_path"/R0.xfl.bak "$ppath"/R0.xfl
    ln "$original_path"/logs/QC_log.txt "$ppath"/QC_log.txt

    catglob "$original_path"/*.fasta > "$REF_SET"
    cd "$ppath"
    for residual_fasta in "$original_path"/residual_assembly/*.fasta; do
        gene=$(basename "$residual_fasta" .fasta)
        if [ -r "$phmms/${gene}_hmm.mod" ]; then
            cat "$residual_fasta" <("$spath"/ordinalHeaders.pl -O default <("$spath"/selectSequences.pl "$DEF_SET" -I "$gene")) > "$ppath/${gene}.patch.fa"
            cd "$ppath"
            "$A2M" "${gene}.patch" -db "${gene}.patch.fa" -modelfile "$phmms/${gene}_hmm.mod" -sw 0 > /dev/null 2>&1
            cd - > /dev/null
            "$spath"/makePatchworkConsensus.pl "$ppath/${gene}.patch.a2m" default -N "$gene" >> "$REF_SET"
        else
            cat "$residual_fasta" >> "$REF_SET"
        fi
    done
    rmglob "$ppath"/*.patch.*

    while [ "$R" -le "$MAX_ROUNDS" ]; do
        setPrograms
        doRound "$QUERY_SET" "R$R" || break
        MIN_RC=1
        MIN_RP=1

        if [ -s "$QUERY_SET" ]; then
            ((R++))
        else
            break
        fi
    done
    doPostProcessing
fi
moveProject
time_stamp 'finished!'
exit 0
### END ###
