#!/opt/conda/conda-bld/cdskit_1772180215506/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeh/bin/python

import argparse
import sys

from datetime import datetime

from cdskit.__init__ import __version__

# Main parser
psr = argparse.ArgumentParser(description='A toolkit to handle protein-coding DNA sequences in frame')
subparsers = psr.add_subparsers()

# Parent parser for shared options

p_version = argparse.ArgumentParser(add_help=False)
p_version.add_argument('--version', action='version', version='cdskit version ' + __version__)

p_infile = argparse.ArgumentParser(add_help=False)
p_infile.add_argument('-s', '--seqfile', metavar='PATH', default='-', type=str, required=False, action='store',
                 help='default=%(default)s: Input sequence file. Use "-" for STDIN.')
p_infile.add_argument('-if', '--inseqformat', metavar='STR', default='fasta', type=str, required=False, action='store',
                 help='default=%(default)s: Input sequence format. See Biopython documentation for available options. https://biopython.org/wiki/SeqIO')

p_outfile = argparse.ArgumentParser(add_help=False)
p_outfile.add_argument('-o', '--outfile', metavar='PATH', default='-', type=str, required=False, action='store',
                 help='default=%(default)s: Output sequence file. Use "-" for STDOUT.')
p_outfile.add_argument('-of', '--outseqformat', metavar='STR', default='fasta', type=str, required=False, action='store',
                 help='default=%(default)s: Output sequence format. See Biopython documentation for available options. https://biopython.org/wiki/SeqIO')

p_codon = argparse.ArgumentParser(add_help=False)
p_codon.add_argument('-d', '--codontable', metavar='INT', default=1, type=int, required=False, action='store',
                      help='default=%(default)s: Codon table ID. The standard code is "1". '
                           'See here for details: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi')

p_gffio = argparse.ArgumentParser(add_help=False)
p_gffio.add_argument('--ingff', metavar='PATH', default=None, type=str, required=False, action='store',
                     help='default=%(default)s: Input gff file.')
p_gffio.add_argument('--outgff', metavar='PATH', default='out.gff', type=str, required=False, action='store',
                     help='default=%(default)s: Output gff file.')

p_threads = argparse.ArgumentParser(add_help=False)
p_threads.add_argument('--threads', metavar='INT', default=1, type=int, required=False, action='store',
                       help='default=%(default)s: Number of worker threads for supported commands. 0 uses all detected CPUs.')

p_seqtype = argparse.ArgumentParser(add_help=False)
p_seqtype.add_argument(
    '--seqtype',
    metavar='dna|protein|auto',
    default='auto',
    type=str,
    required=False,
    action='store',
    choices=['dna', 'protein', 'auto'],
    help='default=%(default)s: Expected sequence type for input validation.',
)



def strtobool(val):
    val = val.lower()
    if val in ("y", "yes", "t", "true", "on", "1"):
        return True
    elif val in ("n", "no", "f", "false", "off", "0"):
        return False
    else:
        raise ValueError(f"invalid truth value {val!r}")

def command_accession2fasta(args):
    from cdskit.accession2fasta import accession2fasta_main
    sys.stderr.write('cdskit accession2fasta: started at {}\n'.format(datetime.now()))
    accession2fasta_main(args)
    sys.stderr.write('cdskit accession2fasta: ended at {}\n'.format(datetime.now()))

help_accession2fasta = 'Retrieving fasta sequences from a list of GenBank accessions. See `cdskit accession2fasta -h`'
p_accession2fasta = subparsers.add_parser('accession2fasta', help=help_accession2fasta, parents=[p_version,p_outfile,p_threads])
p_accession2fasta.add_argument('--accession_file', metavar='PATH', default='', type=str, required=True, action='store',
                               help='default=%(default)s: PATH to the accession-per-line text file.')
p_accession2fasta.add_argument('--email', metavar='aaa@bbb.com', default='', type=str, required=False, action='store',
                               help='default=%(default)s: Your email address. This is passed to the NCBI\'s E-utilities. '
                                    'For details, see here: https://biopython.org/docs/1.75/api/Bio.Entrez.html')
p_accession2fasta.add_argument('--extract_cds', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                               help='default=%(default)s: Whether to extract the CDS feature.')
p_accession2fasta.add_argument('--ncbi_database', metavar='STR', default='nucleotide', type=str, required=False,
                               action='store', choices=['nucleotide', ],
                               help='default=%(default)s: NCBI database to search.')
p_accession2fasta.add_argument('--seqnamefmt', metavar='STR', default='organism_accessions', type=str, required=False, action='store',
                               help='default=%(default)s: Underline-separated list of output sequence name elements. '
                                    'Try --list_seqname_keys to check available values.')
p_accession2fasta.add_argument('--list_seqname_keys', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                               help='default=%(default)s: Listing the keys (and values) available for --seqnamefmt.')
p_accession2fasta.set_defaults(handler=command_accession2fasta)


def command_aggregate(args):
    from cdskit.aggregate import aggregate_main
    sys.stderr.write('cdskit aggregate: started at {}\n'.format(datetime.now()))
    aggregate_main(args)
    sys.stderr.write('cdskit aggregate: ended at {}\n'.format(datetime.now()))

help_aggregate = 'Extracting the longest sequences combined with a sequence name regex. See `cdskit aggregate -h`'
p_aggregate = subparsers.add_parser('aggregate', help=help_aggregate, parents=[p_version,p_infile,p_outfile,p_seqtype,p_threads])
p_aggregate.add_argument('-m', '--mode', metavar='STR', default='longest', type=str, required=False, action='store',
                         choices=['longest', ],
                         help='default=%(default)s: Criterion to keep a sequence during aggregation.')
p_aggregate.add_argument('-x', '--expression', metavar='REGEX', default=[], type=str, required=False, action='store',
                         nargs='+',
                         help='default=%(default)s: Regular expression(s) to aggregate sequences. If omitted, no aggregation by regex is applied.')
p_aggregate.set_defaults(handler=command_aggregate)


def command_backtrim(args):
    from cdskit.backtrim import backtrim_main
    sys.stderr.write('cdskit backtrim: started at {}\n'.format(datetime.now()))
    backtrim_main(args)
    sys.stderr.write('cdskit backtrim: ended at {}\n'.format(datetime.now()))

help_backtrim = 'Back-translating a trimmed protein alignment. See `cdskit backtrim -h`'
p_backtrim = subparsers.add_parser('backtrim', help=help_backtrim, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_backtrim.add_argument('-a', '--trimmed_aa_aln', metavar='PATH', default='', type=str, required=True, action='store',
                        help='default=%(default)s: PATH to the trimmed amino acid alignment. '
                             'In addition to this, please specify the untrimmed CDS alignment by --seqfile.')
p_backtrim.set_defaults(handler=command_backtrim)


def command_backalign(args):
    from cdskit.backalign import backalign_main
    sys.stderr.write('cdskit backalign: started at {}\n'.format(datetime.now()))
    backalign_main(args)
    sys.stderr.write('cdskit backalign: ended at {}\n'.format(datetime.now()))

help_backalign = 'Back-aligning CDS based on an amino acid alignment. See `cdskit backalign -h`'
p_backalign = subparsers.add_parser('backalign', help=help_backalign, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_backalign.add_argument('-a', '--aa_aln', metavar='PATH', default='', type=str, required=True, action='store',
                         help='default=%(default)s: PATH to aligned amino acid sequences. '
                              'In addition to this, please specify unaligned CDS by --seqfile.')
p_backalign.set_defaults(handler=command_backalign)


def command_gapjust(args):
    from cdskit.gapjust import gapjust_main
    sys.stderr.write('cdskit gapjust: started at {}\n'.format(datetime.now()))
    gapjust_main(args)
    sys.stderr.write('cdskit gapjust: ended at {}\n'.format(datetime.now()))

help_gapjust = 'Adjusting consecutive Ns to the fixed length. See `cdskit gapjust -h`'
p_gapjust = subparsers.add_parser('gapjust', help=help_gapjust, parents=[p_version,p_infile,p_outfile,p_gffio,p_threads])
p_gapjust.add_argument('--gap_len', metavar='INT', default=100, type=int, required=False, action='store',
                        help='default=%(default)s: Gap length. Ns will be added or removed to make the gap length fixed.')
p_gapjust.add_argument('--gap_just_min', metavar='INT', default=None, type=int, required=False, action='store',
                        help='default=%(default)s: Minimum gap length to be adjusted. Ns will be extended if the gap length is equal to or greater than this value.')
p_gapjust.add_argument('--gap_just_max', metavar='INT', default=None, type=int, required=False, action='store',
                        help='default=%(default)s: Maximum gap length to be adjusted. Ns will be shortened if the gap length is equal to or smaller than this value.')
p_gapjust.set_defaults(handler=command_gapjust)


def command_hammer(args):
    from cdskit.hammer import hammer_main
    sys.stderr.write('cdskit hammer: started at {}\n'.format(datetime.now()))
    hammer_main(args)
    sys.stderr.write('cdskit hammer: ended at {}\n'.format(datetime.now()))

help_hammer = 'Removing less-occupied codon columns from a gappy alignment. See `cdskit hammer -h`'
p_hammer = subparsers.add_parser('hammer', help=help_hammer, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_hammer.add_argument('--nail', metavar='INT/all', default='4', type=str, required=False, action='store',
                      help='default=%(default)s: Threshold number of "nail sequences" to hammer down. '
                           'Codon columns are removed if there are no more than this number of non-missing sequences. '
                           '"all" generates a completely no-gap output.')
p_hammer.add_argument('--prevent_gap_only', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                       help='default=%(default)s: Whether to relax (decrease) --nail when a gap-only sequence is generated.')
p_hammer.set_defaults(handler=command_hammer)

def command_intersection(args):
    from cdskit.intersection import intersection_main
    sys.stderr.write('cdskit intersection: started at {}\n'.format(datetime.now()))
    intersection_main(args)
    sys.stderr.write('cdskit intersection: ended at {}\n'.format(datetime.now()))

help_intersection = 'Dropping non-overlapping sequence labels between two sequences files or between a sequence file and a gff file. See `cdskit intersection -h`'
p_intersection = subparsers.add_parser('intersection', help=help_intersection, parents=[p_version,p_infile,p_outfile,p_gffio,p_seqtype,p_threads])
p_intersection.add_argument('--seqfile2', metavar='PATH', default=None, type=str, required=False, action='store',
                            help='default=%(default)s: Input sequence file 2.')
p_intersection.add_argument('--inseqformat2', metavar='STR', default='fasta', type=str, required=False, action='store',
                            help='default=%(default)s: Input sequence format for --seqfile2.')
p_intersection.add_argument('--outfile2', metavar='PATH', default='seqfile2.out', type=str, required=False, action='store',
                            help='default=%(default)s: Output sequence file 2.')
p_intersection.add_argument('--outseqformat2', metavar='STR', default='fasta', type=str, required=False,
                            action='store',
                            help='default=%(default)s: Output sequence format for --outfile2.')
p_intersection.add_argument('--fix_outrange_gff_records', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                            help='default=%(default)s: Fix gff records that have coordinates out of the sequence range.')
p_intersection.set_defaults(handler=command_intersection)


def command_label(args):
    from cdskit.label import label_main
    sys.stderr.write('cdskit label: started at {}\n'.format(datetime.now()))
    label_main(args)
    sys.stderr.write('cdskit label: ended at {}\n'.format(datetime.now()))

help_label = 'Modifying sequence labels. See `cdskit label -h`'
p_label = subparsers.add_parser('label', help=help_label, parents=[p_version,p_infile,p_outfile,p_seqtype,p_threads])
p_label.add_argument('--replace_chars', metavar='FROM1FROM2...--TO', default='', type=str, required=False, action='store',
                     help='default=%(default)s: Replace sequence label characters. For example, "!@#$%%^&*+=/?<>|--_" replaces various characters with underbar ("_").')
p_label.add_argument('--clip_len', metavar='INT', default=0, type=int, required=False, action='store',
                     help='default=%(default)s: Maximum length of sequence labels. Longer labels are truncated.')
p_label.add_argument('--unique', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                     help='default=%(default)s: Make sequence labels unique by adding suffix (_1, _2, ...).')
p_label.set_defaults(handler=command_label)


def command_longestcds(args):
    from cdskit.longestcds import longestcds_main
    sys.stderr.write('cdskit longestcds: started at {}\n'.format(datetime.now()))
    longestcds_main(args)
    sys.stderr.write('cdskit longestcds: ended at {}\n'.format(datetime.now()))

help_longestcds = 'Finding the longest CDS from six-frame translation. See `cdskit longestcds -h`'
p_longestcds = subparsers.add_parser('longestcds', help=help_longestcds, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_longestcds.add_argument('--annotate_seqname', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                          help='default=%(default)s: Whether to append strand/frame/coordinate metadata to FASTA headers.')
p_longestcds.set_defaults(handler=command_longestcds)


def command_mask(args):
    from cdskit.mask import mask_main
    sys.stderr.write('cdskit mask: started at {}\n'.format(datetime.now()))
    mask_main(args)
    sys.stderr.write('cdskit mask: ended at {}\n'.format(datetime.now()))

help_mask = 'Masking ambiguous and/or stop codons. See `cdskit mask -h`'
p_mask = subparsers.add_parser('mask', help=help_mask, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_mask.add_argument('-c', '--maskchar', metavar='CHAR', default='N', type=str, required=False, action='store',
                    choices=['N', '-'],
                    help='default=%(default)s: A character to be used to mask codons.')
p_mask.add_argument('-a', '--ambiguouscodon', metavar='yes|no', default='yes', type=str, required=False, action='store',
                    choices=['yes', 'no'],
                    help='default=%(default)s: Mask ambiguous codons. '
                         'e.g., "AAN", which may code Asn or Lys in the standard genetic code.')
p_mask.add_argument('-t', '--stopcodon', metavar='yes|no', default='yes', type=str, required=False, action='store',
                    choices=['yes', 'no'],
                    help='default=%(default)s: Mask stop codons.')
p_mask.set_defaults(handler=command_mask)


def command_maxalign(args):
    from cdskit.maxalign import maxalign_main
    sys.stderr.write('cdskit maxalign: started at {}\n'.format(datetime.now()))
    maxalign_main(args)
    sys.stderr.write('cdskit maxalign: ended at {}\n'.format(datetime.now()))

help_maxalign = 'Removing sequences to maximize codon-based alignment area. See `cdskit maxalign -h`'
p_maxalign = subparsers.add_parser('maxalign', help=help_maxalign, parents=[p_version, p_infile, p_outfile, p_threads])
p_maxalign.add_argument('--mode', metavar='auto|exact|greedy', default='auto', type=str, required=False, action='store',
                        choices=['auto', 'exact', 'greedy'],
                        help='default=%(default)s: Solver mode for maximizing alignment area.')
p_maxalign.add_argument('--max_exact_sequences', metavar='INT', default=16, type=int, required=False, action='store',
                        help='default=%(default)s: Maximum number of input sequences for exact search.')
p_maxalign.add_argument('--missing_char', metavar='STR', default='-?.', type=str, required=False, action='store',
                        help='default=%(default)s: Characters treated as missing within a codon.')
p_maxalign.add_argument('--keep', metavar='REGEX1,REGEX2,...', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Comma-separated regex patterns for sequence names that should not be dropped.')
p_maxalign.add_argument('--max_removed', metavar='INT', default=None, type=int, required=False, action='store',
                        help='default=%(default)s: Maximum number of input sequences that can be removed in total.')
p_maxalign.add_argument('--report', metavar='PATH', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Optional report output path. If PATH ends with ".json", JSON is written; otherwise tab-separated TSV is written.')
p_maxalign.set_defaults(handler=command_maxalign)


def command_pad(args):
    from cdskit.pad import pad_main
    sys.stderr.write('cdskit pad: started at {}\n'.format(datetime.now()))
    pad_main(args)
    sys.stderr.write('cdskit pad: ended at {}\n'.format(datetime.now()))

help_pad = 'Making nucleotide sequences in-frame by head and tail paddings. See `cdskit pad -h`'
p_pad = subparsers.add_parser('pad', help=help_pad, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_pad.add_argument('-c', '--padchar', metavar='CHAR', default='N', type=str, required=False, action='store',
                   choices=['N', '-'],
                   help='default=%(default)s: A character to be used to pad when the sequence length is not multiple of three.')
p_pad.add_argument('-n', '--nopseudo', default=False, required=False, action='store_true',
                   help='default=%(default)s: Drop sequences that contain stop codon(s) even after padding to 5\'- or 3\'- terminal.')
p_pad.set_defaults(handler=command_pad)


def command_parsegb(args):
    from cdskit.parsegb import parsegb_main
    sys.stderr.write('cdskit parsegb: started at {}\n'.format(datetime.now()))
    parsegb_main(args)
    sys.stderr.write('cdskit parsegb: ended at {}\n'.format(datetime.now()))

help_parsegb = 'Converting the GenBank format. See `cdskit parsegb -h`'
p_parsegb = subparsers.add_parser('parsegb', help=help_parsegb, parents=[p_version,p_outfile,p_threads])
p_parsegb.add_argument('-s', '--seqfile', metavar='PATH', default='-', type=str, required=False, action='store',
                       help='default=%(default)s: Input sequence file. Use "-" for STDIN.')
p_parsegb.add_argument('-if', '--inseqformat', metavar='STR', default='genbank', type=str, required=False, action='store',
                       help='default=%(default)s: Input sequence format for parsegb. Use "genbank" (or "gb").')
p_parsegb.add_argument('--seqnamefmt', metavar='STR', default='organism_accessions', type=str, required=False, action='store',
                       help='default=%(default)s: Underline-separated list of sequence name elements. '
                            'Use --list_seqname_keys to browse available values.')
p_parsegb.add_argument('--list_seqname_keys', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                       help='default=%(default)s: Listing the keys (and values) available for --seqnamefmt.')
p_parsegb.add_argument('--extract_cds', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                       help='default=%(default)s: Whether to extract the CDS feature.')
p_parsegb.set_defaults(handler=command_parsegb)


def command_printseq(args):
    from cdskit.printseq import printseq_main
    sys.stderr.write('cdskit printseq: started at {}\n'.format(datetime.now()))
    printseq_main(args)
    sys.stderr.write('cdskit printseq: ended at {}\n'.format(datetime.now()))

help_printseq = 'Print a subset of sequences with a regex. See `cdskit printseq -h`'
p_printseq = subparsers.add_parser('printseq', help=help_printseq, parents=[p_version,p_infile,p_seqtype,p_threads])
p_printseq.add_argument('-n', '--seqname', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Name of the sequence to print. Regex is supported.')
p_printseq.add_argument('--show_seqname', metavar='yes|no', default='yes', type=strtobool, required=False, action='store',
                        help='default=%(default)s: Whether to show sequence name starting with ">". '
                             '"no" prints sequences only.')
p_printseq.set_defaults(handler=command_printseq)

def command_rmseq(args):
    from cdskit.rmseq import rmseq_main
    sys.stderr.write('cdskit rmseq: started at {}\n'.format(datetime.now()))
    rmseq_main(args)
    sys.stderr.write('cdskit rmseq: ended at {}\n'.format(datetime.now()))

help_rmseq = 'Removing a subset of sequences by using a sequence name regex and by detecting problematic sequence characters. See `cdskit rmseq -h`'
p_rmseq = subparsers.add_parser('rmseq', help=help_rmseq, parents=[p_version,p_infile,p_outfile,p_seqtype,p_threads])
p_rmseq.add_argument('--seqname', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Names of sequences to remove. Regex is supported.')
p_rmseq.add_argument('--problematic_char', default='NX-?', type=str, required=False, action='store',
                        help='default=%(default)s: Problematic characters considered by --problematic_percent. Without separator.')
p_rmseq.add_argument('--problematic_percent', default=0, type=float, required=False, action='store',
                        help='default=%(default)s: Sequences containing >= this percentage of --problematic_char are removed.')
p_rmseq.set_defaults(handler=command_rmseq)

def command_split(args):
    from cdskit.split import split_main
    sys.stderr.write('cdskit split: started at {}\n'.format(datetime.now()))
    split_main(args)
    sys.stderr.write('cdskit split: ended at {}\n'.format(datetime.now()))

help_split = 'Splitting 1st, 2nd, and 3rd codon positions. See `cdskit split -h`'
p_split = subparsers.add_parser('split', help=help_split, parents=[p_version,p_infile,p_outfile,p_threads])
p_split.add_argument('--prefix', default='INFILE', type=str, required=False, action='store',
                        help='default=%(default)s: Output prefix PATH. '
                             'If this is INFILE and --outfile is set, --outfile is used as the prefix.')
p_split.set_defaults(handler=command_split)

def command_stats(args):
    from cdskit.stats import stats_main
    sys.stderr.write('cdskit stats: started at {}\n'.format(datetime.now()))
    stats_main(args)
    sys.stderr.write('cdskit stats: ended at {}\n'.format(datetime.now()))

help_stats = 'Printing sequence statistics. See `cdskit stats -h`'
p_stats = subparsers.add_parser('stats', help=help_stats, parents=[p_version,p_infile,p_threads])
p_stats.set_defaults(handler=command_stats)


def command_translate(args):
    from cdskit.translate import translate_main
    sys.stderr.write('cdskit translate: started at {}\n'.format(datetime.now()))
    translate_main(args)
    sys.stderr.write('cdskit translate: ended at {}\n'.format(datetime.now()))

help_translate = 'Translating CDS nucleotide sequences to amino acids. See `cdskit translate -h`'
p_translate = subparsers.add_parser('translate', help=help_translate, parents=[p_version,p_infile,p_outfile,p_codon,p_threads])
p_translate.add_argument('--to_stop', metavar='yes|no', default='no', type=strtobool, required=False, action='store',
                         help='default=%(default)s: Whether to stop translation at the first in-frame stop codon.')
p_translate.set_defaults(handler=command_translate)


def command_validate(args):
    from cdskit.validate import validate_main
    sys.stderr.write('cdskit validate: started at {}\n'.format(datetime.now()))
    validate_main(args)
    sys.stderr.write('cdskit validate: ended at {}\n'.format(datetime.now()))

help_validate = 'Validating aligned CDS quality and reporting issues. See `cdskit validate -h`'
p_validate = subparsers.add_parser('validate', help=help_validate, parents=[p_version,p_infile,p_codon,p_threads])
p_validate.add_argument('--report', metavar='PATH', default='', type=str, required=False, action='store',
                        help='default=%(default)s: Optional report output path. If PATH ends with ".json", JSON is written; otherwise tab-separated TSV is written.')
p_validate.set_defaults(handler=command_validate)


# Handler
args = psr.parse_args()
if hasattr(args, 'handler'):
    try:
        args.handler(args)
    except Exception as e:
        txt = str(e)
        if (txt == '') or (txt is None):
            txt = e.__class__.__name__
        sys.stderr.write(txt.rstrip('\n') + '\n')
        sys.exit(1)
else:
    psr.print_help()
