#!/opt/conda/conda-bld/crabs_1763648678583/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placeho/bin/python

##################
# IMPORT MODULES #
##################
import os, rich, rich.progress, collections, requests
import rich_click as click
from function import __version__
from function.crabs_functions import (check_params,
                                      check_midori_values, 
                                      embl_url,
                                      midori_url,
                                      parse_exclude, 
                                      set_output_dir, 
                                      download_file, 
                                      remove_tar_intermediary, 
                                      unzip_with_progress, 
                                      download_chunked_file, 
                                      gunzip_with_progress, 
                                      download_ncbi_seqs, 
                                      retrieve_species, 
                                      build_query, 
                                      ncbi_download_info, 
                                      select_function,
                                      names_to_memory,
                                      nodes_to_memory,
                                      accession_to_memory,
                                      generate_lineages,
                                      fill_missing_lineages,
                                      dict_to_output,
                                      merge_uniq_databases,
                                      merge_databases,
                                      check_files,
                                      write_list_to_output,
                                      filter_function,
                                      select_subset,
                                      subset_function,
                                      classifier_format,
                                      idt_text,
                                      blast_no_tax,
                                      blast_tax,
                                      unknown_base_conversion,
                                      rev_comp,
                                      cutadapt,
                                      cutadapt_relaxed,
                                      crabs_to_fasta,
                                      list_to_fasta,
                                      multiple_crabs_to_fasta,
                                      multiple_list_to_temp,
                                      usearch_global,
                                      extract_alignment_results,
                                      write_dict_to_output,
                                      parse_diversity,
                                      horizontal_bar_chart,
                                      parse_length,
                                      line_graph,
                                      calculate_ncbi_species_genera,
                                      calculate_database_species_genera,
                                      completeness_table_output,
                                      parse_phylo_input,
                                      subset_phylo_input,
                                      dict_to_fasta,
                                      align_sequences,
                                      generate_phylo_tree,
                                      amplicon_import,
                                      raw_import,
                                      extract_primer_regions,
                                      deconstruct_primer_regions,
                                      dict_to_array,
                                      efficiency_barplot,
                                      parse_primer,
                                      )

#####################
# CLI CONFIGURATION #
#####################
# formatting
click.rich_click.USE_RICH_MARKUP = True
click.rich_click.SHOW_METAVARS_COLUMN = False
click.rich_click.APPEND_METAVARS_HELP = True
click.rich_click.HEADER_TEXT = (f"[yellow]/[/][cyan]/[/][yellow]/[/] [bold][link=https://github.com/gjeunen/reference_database_creator]CRABS[/link][/] | v{__version__}")
click.rich_click.FOOTER_TEXT = "See [link=https://github.com/gjeunen/reference_database_creator]https://github.com/gjeunen/reference_database_creator[/] for more details."
click.rich_click.ERRORS_SUGGESTION = f"This is CRABS [cyan]v{__version__}[/]\nFor more help, run '[yellow]crabs --help[/]' or visit [link=https://github.com/gjeunen/reference_database_creator]https://github.com/gjeunen/reference_database_creator[/]"
click.rich_click.STYLE_ERRORS_SUGGESTION = ""

# grouping of options
click.rich_click.OPTION_GROUPS = {
    "crabs": [
        {
            "name": "Download NCBI Taxonomy",
            "options": [
                "--download-taxonomy",
                "--exclude",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download BOLD Database",
            "options": [
                "--download-bold",
                "--taxon",
                "--marker",
                "--output",
                "--version-v3",
            ],
            "deduplicate": False
        },
        {
            "name": "Download EMBL Database",
            "options": [
                "--download-embl",
                "--taxon",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download GreenGenes Database",
            "options": [
                "--download-greengenes",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download GreenGenes2 Database",
            "options": [
                "--download-greengenes2",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download Meta-Fish-Lib",
            "options": [
                "--download-meta-fish-lib",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download MIDORI2 Database",
            "options": [
                "--download-midori",
                "--gene",
                "--gb-number",
                "--gb-type",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download MitoFish Database",
            "options": [
                "--download-mitofish",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download NCBI Database",
            "options": [
                "--download-ncbi",
                "--email",
                "--query",
                "--database",
                "--batchsize",
                "--species",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Download SILVA Database",
            "options": [
                "--download-silva",
                "--gene",
                "--db-type",
                "--db-version",
                "--output",
            ],
            "deduplicate": False
        },
        {
            "name": "Import sequences into CRABS format",
            "options": [
                "--import",
                "--import-format",
                "--names",
                "--nodes",
                "--acc2tax",
                "--input",
                "--output",
                "--ranks",
            ],
            "deduplicate": False
        },
        {
            "name": "Merge CRABS databases into one file",
            "options": [
                "--merge",
                "--input",
                "--output",
                "--uniq",
            ],
            "deduplicate": False
        },
        {
            "name": "Extract amplicons through in silico PCR",
            "options": [
                "--in-silico-pcr",
                "--input",
                "--output",
                "--forward",
                "--reverse",
                "--mismatch",
                "--threads",
                "--untrimmed",
                "--relaxed",
                "--buffer-size",
            ],
            "deduplicate": False
        },
        {
            "name": "Retrieve amplicons without primer-binding regions",
            "options": [
                "--pairwise-global-alignment",
                "--input",
                "--output",
                "--amplicons",
                "--forward",
                "--reverse",
                "--size-select",
                "--threads",
                "--percent-identity",
                "--coverage",
                "--all-start-positions",
            ],
            "deduplicate": False
        },
        {
            "name": "Dereplicate CRABS database",
            "options": [
                "--dereplicate",
                "--input",
                "--output",
                "--dereplication-method",
            ],
            "deduplicate": False
        },
        {
            "name": "Filter CRABS database",
            "options": [
                "--filter",
                "--input",
                "--output",
                "--minimum-length",
                "--maximum-length",
                "--maximum-n",
                "--environmental",
                "--no-species-id",
                "--rank-na",
            ],
            "deduplicate": False
        },
        {
            "name": "Subset CRABS database on taxonomic ID",
            "options": [
                "--subset",
                "--input",
                "--output",
                "--include",
                "--exclude",
            ],
            "deduplicate": False
        },
        {
            "name": "Figure: diversity contained within database",
            "options": [
                "--diversity-figure",
                "--input",
                "--output",
                "--tax-level",
            ],
            "deduplicate": False
        },
        {
            "name": "Figure: amplicon length distribution",
            "options": [
                "--amplicon-length-figure",
                "--input",
                "--output",
                "--tax-level",
            ],
            "deduplicate": False
        },
        {
            "name": "Figure: phylogenetic tree",
            "options": [
                "--phylogenetic-tree",
                "--input",
                "--output",
                "--tax-level",
                "--species",
            ],
            "deduplicate": False
        },
        {
            "name": "Figure: amplification efficiency",
            "options": [
                "--amplification-efficiency-figure",
                "--input",
                "--amplicons",
                "--forward",
                "--reverse",
                "--output",
                "--tax-group",
            ],
            "deduplicate": False
        },
        {
            "name": "Table: database completeness for target taxonomic group",
            "options": [
                "--completeness-table",
                "--input",
                "--output",
                "--names",
                "--nodes",
                "--species",
            ],
            "deduplicate": False
        },
        {
            "name": "Export CRABS database to taxonomic classifier format",
            "options": [
                "--export",
                "--input",
                "--output",
                "--export-format",
            ],
            "deduplicate": False
        },
    ],
}

# link user-input to options
@click.command(context_settings=dict(help_option_names=["-h", "--help"]))

# CRABS functions
@click.option("--download-taxonomy", "download_taxonomy_", is_flag = True, help = "Function to download NCBI taxonomy")
@click.option("--download-bold", "download_bold_", is_flag = True, help = "Function to download BOLD database")
@click.option("--download-embl", "download_embl_", is_flag = True, help = "Function to download EMBL database")
@click.option("--download-greengenes", "download_greengenes_", is_flag = True, help = "Function to download GreenGenes database")
@click.option("--download-greengenes2", "download_greengenes2_", is_flag = True, help = "Function to downlaod GreenGenes2 database")
@click.option("--download-meta-fish-lib", "download_meta_fish_lib_", is_flag = True, help = "Function to download the Meta-Fish-Lib database")
@click.option("--download-midori", "download_midori_", is_flag = True, help = "Function to download MIDORI2 database")
@click.option("--download-mitofish", "download_mitofish_", is_flag = True, help = "Function to download MitoFish database")
@click.option("--download-ncbi", "download_ncbi_", is_flag = True, help = "Function to download NCBI database")
@click.option("--download-silva", "download_silva_", is_flag = True, help = "Function to download SILVA database")
@click.option("--import", "import_", is_flag = True, help = "Function to import sequences into CRABS format")
@click.option("--merge", "merge_", is_flag = True, help = "Function to merge CRABS databases into a single file")
@click.option("--in-silico-pcr", "in_silico_pcr_", is_flag = True, help = "Function to extract amplicons through in silico PCR")
@click.option("--pairwise-global-alignment", "pairwise_global_alignment_", is_flag = True, help = "Function to retrieve amplicons without primer-bidning regions")
@click.option("--dereplicate", "dereplicate_", is_flag = True, help = "Function to dereplicate a CRABS database")
@click.option("--filter", "filter_", is_flag = True, help = "Function to filter a CRABS database")
@click.option("--subset", "subset_", is_flag = True, help = "Function to subset a CRABS database")
@click.option("--diversity-figure", "diversity_figure_", is_flag = True, help = "Function to create a horizontal bar chart with included diversity")
@click.option("--amplicon-length-figure", "amplicon_length_figure_", is_flag = True, help = "Function to create a line chart depicting amplicon distributions")
@click.option("--phylogenetic-tree", "phylogenetic_tree_", is_flag = True, help = "Function to create a phylogenetic tree with barcodes for target species list")
@click.option("--amplification-efficiency-figure", "amplification_efficiency_figure_", is_flag = True, help = "Function to create a bar graph displaying mismatches in the primer-binding region")
@click.option("--completeness-table", "completeness_table_", is_flag = True, help = "Function creating a spreadsheet containing barcode availability for taxonomic groups")
@click.option("--export", "export_", is_flag = True, help = "Function to export a CRABS database")

# CRABS parameters
@click.option("--output", "output_", help = "output directory or filename")
@click.option("--exclude", "exclude_", help = "stop the download of 'acc2taxid' or 'taxdump'")
@click.option("--taxon", "taxon_", help = "taxonomic group to download")
@click.option("--gene", "gene_", help = "gene to download")
@click.option("--gb-number", "gb_number_", help = "database version to download")
@click.option("--gb-type", "gb_type_", type = str, help = "database type to download")
@click.option("--marker", "marker_", help = "genetic marker to download")
@click.option("--version-v3", "version_v3_", is_flag = True, help = "download data from BOLD v3 (legacy)")
@click.option("--email", "email_", help = "email address to connect to NCBI server")
@click.option("--query", "query_", help = "query identifying what to download from NCBI")
@click.option("--database", "database_", help = "the database from which NCBI sequences are downloaded")
@click.option("--batchsize", "batchsize_", default = 5000, type = int, help = "sequences to download from NCBI per chunk (default = 5,000)")
@click.option("--species", "species_", help = "species of interest list")
@click.option("--db-type", "db_type_", help = "database version to download")
@click.option("--db-version", "db_version_", help = "database version to download")
@click.option("--import-format", "import_format_", help = "format of the sequences to import")
@click.option("--names", "names_", help = "NCBI taxonomy 'names.dmp' file")
@click.option("--nodes", "nodes_", help = "NCBI taxonomy 'nodes.dmp' file")
@click.option("--acc2tax", "acc2tax_", help = "NCBI taxonomy 'nucl_gb.accession2taxid' file")
@click.option("--input", "input_", help = "input filename")
@click.option("--ranks", "ranks_", default = 'kingdom;phylum;class;order;family;genus;species', help = "taxonomic ranks to be included in the taxonomic lineage")
@click.option("--uniq", "uniq_", is_flag = True, help = "keep only unique accession numbers")
@click.option("--dereplication-method", "dereplication_method_", default = 'unique_species', help = 'dereplication method: "strict", "single_species", and "unique_species" (default)')
@click.option("--minimum-length", "minimum_length_", help = "minimum sequence length for amplicon to be retained in the database", type = int)
@click.option("--maximum-length", "maximum_length_", help = "maximum sequence length for amplicon to be retained in the database", type = int)
@click.option("--maximum-n", "maximum_n_", help = "discard amplicons with N or more ambiguous bases", type = int)
@click.option("--environmental", "environmental_", is_flag = True, help = "discard environmental sequences from the database")
@click.option("--no-species-id", "no_species_id_", is_flag = True, help = "discard sequences for which no species name is available")
@click.option("--rank-na", "rank_na_", help = "discard sequences with N or more unspecified taxonomic levels", type = int)
@click.option("--include", "include_", help = "string or file containing taxa to include")
@click.option("--exclude", "exclude_", help = "string or file containing taxa to exclude")
@click.option("--export-format", "export_format_", help = 'export format: "sintax", "rdp", "qiime-fasta", "qiime-text", "dada2-species", "dada2-taxonomy", "idt-fasta", "idt-text", "blast-notax", "blast-tax"')
@click.option("--forward", "forward_", help = "forward primer sequence in 5' -> 3' direction")
@click.option("--reverse", "reverse_", help = "reverse primer sequence in 5' -> 3' direction")
@click.option("--mismatch", "mismatch_", type = float, default = 4.5, help = "number of mismatches allowed in the primer-binding site (default: 4)")
@click.option("--threads", "threads_", type = int, default = 0, help = "number of threads used to compute the in silico PCR (default: autodetection)")
@click.option("--untrimmed", "untrimmed_", help = "file name for untrimmed sequences")
@click.option("--relaxed", "relaxed_", is_flag = True, help = "recover amplicons where only the forward or reverse primer-binding region was found")
@click.option("--buffer-size", "buffer_size_", type = int, help = "value 2x the longest sequence in the data, only necessary when observing an 'OverflowError'")
@click.option("--amplicons", "amplicons_", help = "file name for the amplicons retrieved during in silico PCR")
@click.option("--size-select", "size_select_", help = "exclude reads longer than N from the analysis")
@click.option("--percent-identity", "percent_identity_", help = "minimum percent identity threshold for the alignment to pass (0.0 - 1.0)")
@click.option("--coverage", "coverage_", help = "minimum coverage threshold for the alignment to pass (0 - 100)")
@click.option("--all-start-positions", "all_start_positions_", is_flag = True, help = "do not restrict alignment start and end to be within the primer-binding region length")
@click.option("--tax-level", "tax_level_", type = int, help = "taxonomic level to be used as groups for horizontal bar chart")
@click.option("--tax-group", "tax_group_", help = "taxonomic group of interest to be included in the analysis")

#################
# MAIN FUNCTION #
#################
def crabs(**kwargs):
    """CRABS is an open-source software program that enables scientists to build custom local reference databases for improved taxonomy assignment of metabarcoding data.
    
    CRABS is split up into various functions and steps to accomplish this task, including:

    (1) download data from online repositories,

    (2) import downloaded data into CRABS format,
    
    (3) extract amplicons from imported data,

    (4) retrieve amplicons without primer-binding regions,
    
    (5) curate and subset the local database,
    
    (6) export the local database in various taxonomic classifier formats, and
    
    (7) basic visualisations to explore the local reference database.
    


    A basic example to run CRABS (download NCBI taxonomy information): 
    
    [blue bold]crabs --download-taxonomy --exclude 'acc2taxid'[/]
    """

# access all functions from kwargs
    download_taxonomy_ = kwargs.get("download_taxonomy_")
    download_bold_ = kwargs.get("download_bold_")
    download_embl_ = kwargs.get("download_embl_")
    download_greengenes_ = kwargs.get("download_greengenes_")
    download_greengenes2_ = kwargs.get("download_greengenes2_")
    download_midori_ = kwargs.get("download_midori_")
    download_mitofish_ = kwargs.get("download_mitofish_")
    download_ncbi_ = kwargs.get("download_ncbi_")
    download_silva_ = kwargs.get("download_silva_")
    download_meta_fish_lib_ = kwargs.get("download_meta_fish_lib_")
    import_ = kwargs.get("import_")
    merge_ = kwargs.get("merge_")
    in_silico_pcr_ = kwargs.get("in_silico_pcr_")
    pairwise_global_alignment_ = kwargs.get("pairwise_global_alignment_")
    dereplicate_ = kwargs.get("dereplicate_")
    filter_ = kwargs.get("filter_")
    subset_ = kwargs.get("subset_")
    diversity_figure_ = kwargs.get("diversity_figure_")
    amplicon_length_figure_ = kwargs.get("amplicon_length_figure_")
    phylogenetic_tree_ = kwargs.get("phylogenetic_tree_")
    amplification_efficiency_figure_ = kwargs.get("amplification_efficiency_figure_")
    completeness_table_ = kwargs.get("completeness_table_")
    export_ = kwargs.get("export_")

# access all options from kwargs
    output_ = kwargs.get("output_")
    exclude_ = kwargs.get("exclude_")
    taxon_ = kwargs.get("taxon_")
    marker_ = kwargs.get("marker_")
    version_v3_ = kwargs.get("version_v3_")
    gene_ = kwargs.get("gene_")
    gb_number_ = kwargs.get("gb_number_")
    gb_type_ = kwargs.get("gb_type_")
    email_ = kwargs.get("email_")
    query_ = kwargs.get("query_")
    database_ = kwargs.get("database_")
    batchsize_ = kwargs.get("batchsize_")
    species_ = kwargs.get("species_")
    db_type_ = kwargs.get("db_type_")
    db_version_ = kwargs.get("db_version_")
    import_format_ = kwargs.get("import_format_")
    names_ = kwargs.get("names_")
    nodes_ = kwargs.get("nodes_")
    acc2tax_ = kwargs.get("acc2tax_")
    input_ = kwargs.get("input_")
    ranks_ = kwargs.get("ranks_")
    uniq_ = kwargs.get("uniq_")
    dereplication_method_ = kwargs.get("dereplication_method_")
    minimum_length_ = kwargs.get("minimum_length_")
    maximum_length_ = kwargs.get("maximum_length_")
    maximum_n_ = kwargs.get("maximum_n_")
    environmental_ = kwargs.get("environmental_")
    no_species_id_ = kwargs.get("no_species_id_")
    rank_na_ = kwargs.get("rank_na_")
    include_ = kwargs.get("include_")
    exclude_ = kwargs.get("exclude_")
    export_format_ = kwargs.get("export_format_")
    forward_ = kwargs.get("forward_")
    reverse_ = kwargs.get("reverse_")
    mismatch_ = kwargs.get("mismatch_")
    threads_ = kwargs.get("threads_")
    untrimmed_ = kwargs.get("untrimmed_")
    relaxed_ = kwargs.get("relaxed_")
    buffer_size_ = kwargs.get("buffer_size_")
    amplicons_ = kwargs.get("amplicons_")
    size_select_ = kwargs.get("size_select_")
    percent_identity_ = kwargs.get("percent_identity_")
    coverage_ = kwargs.get("coverage_")
    include_all_start_positions_ = kwargs.get("all_start_positions_")
    tax_level_ = kwargs.get("tax_level_")
    tax_group_ = kwargs.get("tax_group_")

# print starting info to console
    console = rich.console.Console(stderr=True, highlight=False)
    console.print(f"\n[yellow]/[/][cyan]/[/][yellow]/[/] [bold][link=https://github.com/gjeunen/reference_database_creator]CRABS[/link][/] | v{__version__}\n")
    columns = [*rich.progress.Progress.get_default_columns(), rich.progress.TimeElapsedColumn()]

# identify function and execute
#####################
# DOWNLOAD TAXONOMY #
#####################
    if download_taxonomy_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download NCBI taxonomy files")
        # check what files to download based on exclude_
        download_dict = parse_exclude(exclude_)
        # set output directory
        output_directory = set_output_dir(output_)
        # iterate over download_dict for the different files
        for key, value in download_dict.items():
            # set filename
            filename = value.split('/')[-1]
            # download file
            download_file(console, columns, value, output_directory, filename)
            # unzip file
            unzip_method = select_function(key)
            unzip_method(console, columns, output_directory, filename)
            # remove zipped and intermediary files
            os.remove(f'{output_directory}{filename}')
            remove_tar_intermediary(key, output_directory)

#################
# DOWNLOAD BOLD #
#################
    if download_bold_:
        # print function to console
        console.print("[cyan]|            Function[/] | Download BOLD database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_, '"--taxon"': taxon_})
        # determine if download from v3 specified
        if version_v3_:
            # set url, output directory, and filename
            url = 'http://v3.boldsystems.org/index.php/API_Public/sequence?taxon=' + taxon_
            if marker_:
                url = url + '&marker=' + marker_
            output_directory = f'{os.path.dirname(output_)}/'
            if output_directory == '/':
                output_directory = ''
            filename = output_.split('/')[-1]
            # download the file
            download_chunked_file(console, columns, url, output_directory, filename)
        else:
            # step 1: preprocess query
            preprocess_url = f"https://portal.boldsystems.org/api/query/preprocessor?query=tax:{taxon_}"
            response = requests.get(preprocess_url)
            if response.status_code != 200:
                console.print(f"[cyan]|               ERROR[/] | Preprocessor request failed ({response.status_code})")
                exit()
            preprocess_data = response.json()
            # step 2: extract matched terms
            try:
                matched_tax = preprocess_data["successful_terms"][0]["matched"]
            except (KeyError, IndexError):
                console.print(f"[cyan]|               ERROR[/] | Taxon '{taxon_}' not found in BOLD v5.")
                exit()
            if not matched_tax.startswith("tax:"):
                console.print(f"[cyan]|               ERROR[/] | Taxon '{taxon_}' not found in BOLD v5.")
                exit()
            # step 3: retrieve query id
            query_url = f"https://portal.boldsystems.org/api/query?query={matched_tax}&extent=full"
            query_response = requests.get(query_url)
            if query_response.status_code != 200:
                console.print(f"[cyan]|               ERROR[/] | Query request failed ({query_response.status_code})")
                exit()
            query_json = query_response.json()
            query_id = query_json.get("query_id", None)
            if not query_id:
                console.print("[cyan]|               ERROR[/] | No query_id returned from BOLD v5.")
                exit()
            # step 4: download results
            download_url = f"https://portal.boldsystems.org/api/documents/{query_id}/download?format=tsv"
            output_directory = f'{os.path.dirname(output_)}/'
            if output_directory == '/':
                output_directory = ''
            filename = output_.split('/')[-1]
            download_chunked_file(console, columns, download_url, output_directory, filename)

#################
# DOWNLOAD EMBL #
#################
    if download_embl_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download EMBL database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_, '"--taxon"': taxon_})
        # find all matching files to taxon_ and store as urls
        urls = embl_url(console, taxon_)
        # print number of files to download to console
        if len(urls) > 1:
            console.print(f"[cyan]|             Results[/] | Downloading {len(urls)} files from EMBL")
        else:
            console.print(f"[cyan]|             Results[/] | Downloading {len(urls)} file from EMBL")
        # set output_directory, and filename
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        outputfilename = output_.split('/')[-1]
        # download the files
        matching_files = []
        for url in urls:
            zipfilename = url.split('/')[-1]
            matching_files.append(zipfilename)
            download_file(console, columns, url, output_directory, zipfilename)
        # unzip files and remove zipped intermediary files
        file_count = 0
        for file in matching_files:
            file_count += 1
            if file_count == 1:
                gunzip_with_progress(console, columns, output_directory, file, outputfilename, append = False)
            else:
                gunzip_with_progress(console, columns, output_directory, file, outputfilename, append = True)
            os.remove(f'{output_directory}{file}')

#######################
# DOWNLOAD GREENGENES #
#######################
    if download_greengenes_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download GreenGenes database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_})
        # set url, output directory, and filename
        urls = ['https://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/current_GREENGENES_gg16S_unaligned.fasta.gz',
                'https://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/current_prokMSA_unaligned.fasta.gz',
                'https://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/current_HOMD_gg16S_unaligned.fasta.gz',
                'https://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/current_NCBI_gg16S_unaligned.fasta.gz',
                'https://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/current_RDP_gg16S_unaligned.fasta.gz',
                'https://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/current_SILVA_gg16S_unaligned.fasta.gz']
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        outputfilename = output_.split('/')[-1]
        # download the zip files
        matching_files = []
        for url in urls:
            zipfilename = url.split('/')[-1]
            matching_files.append(zipfilename)
            download_file(console, columns, url, output_directory, zipfilename)
        # unzip files and remove zipped intermediary files
        file_count = 0
        for file in matching_files:
            file_count += 1
            if file_count == 1:
                gunzip_with_progress(console, columns, output_directory, file, outputfilename, append = False)
            else:
                gunzip_with_progress(console, columns, output_directory, file, outputfilename, append = True)
            os.remove(f'{output_directory}{file}')

########################
# DOWNLOAD GREENGENES2 #
########################
    if download_greengenes2_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download GreenGenes2 database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_})
        # set url, output directory, and filename
        url = 'https://ftp.microbio.me/greengenes_release/current/2024.09.seqs.fna.gz'
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        zipfilename = url.split('/')[-1]
        outputfilename = output_.split('/')[-1]
        # download the zip file
        download_file(console, columns, url, output_directory, zipfilename)
        # unzip the downloaded file
        gunzip_with_progress(console, columns, output_directory, zipfilename, outputfilename, append = False)
        # remove intermediary files
        os.remove(f'{output_directory}{zipfilename}')

##########################
# DOWNLOAD META-FISH-LIB #
##########################
    if download_meta_fish_lib_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download Meta-Fish-Lib database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_})
        # set url, output_directory, and filename
        url = 'https://raw.githubusercontent.com/genner-lab/meta-fish-lib/main/assets/reference-library-master.csv.gz'
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        zipfilename = url.split('/')[-1]
        outputfilename = output_.split('/')[-1]
        # download the zip file
        download_file(console, columns, url, output_directory, zipfilename)
        # unzip the downloaded file
        gunzip_with_progress(console, columns, output_directory, zipfilename, outputfilename, append = False)
        # remove intermediary files
        os.remove(f'{output_directory}{zipfilename}')

####################
# DOWNLOAD MIDORI2 #
####################
    if download_midori_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download MIDORI2 database")
        # check if all parameters have been provided and correctly formatted
        check_params(console, {'"--output"': output_, '"--gene"': gene_, '"--gb-number"': gb_number_, '"--gb-type': gb_type_})
        check_midori_values(console, gene_, gb_type_, gb_number_)
        # set url, output_directory, and filename
        zip_type, url = midori_url(gb_number_, gb_type_, gene_)
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        zipfilename = url.split('/')[-1]
        outputfilename = output_.split('/')[-1]
        # download the zip file
        download_file(console, columns, url, output_directory, zipfilename)
        # unzip the downloaded file
        if zip_type == 'unzip':
            unzip_with_progress(console, columns, output_directory, zipfilename, outputfilename)
        elif zip_type == 'gunzip':
            gunzip_with_progress(console, columns, output_directory, zipfilename, outputfilename, append = False)
        # remove intermediary files
        os.remove(f'{output_directory}{zipfilename}')
        
#####################
# DOWNLOAD MITOFISH #
#####################
    if download_mitofish_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download MitoFish database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_})
        # set url, output_directory, and filename
        url = 'http://mitofish.aori.u-tokyo.ac.jp/species/detail/download/?filename=download%2F/complete_partial_mitogenomes.zip'
        url2 = 'https://mitofish.aori.u-tokyo.ac.jp/download/fullseq/latest/mitofishdb.fa.gz'
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        zipfilename = url2.split('/')[-1]
        outputfilename = output_.split('/')[-1]
        # download the zip file
        download_file(console, columns, url2, output_directory, zipfilename)
        # unzip the downloaded file
        gunzip_with_progress(console, columns, output_directory, zipfilename, outputfilename, append = False)
        # remove intermediary files
        os.remove(f'{output_directory}{zipfilename}')

#################
# DOWNLOAD NCBI #
#################
    if download_ncbi_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download NCBI database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_, '"--database"': database_, '"--query"': query_, '"--email"': email_})
        # retrieve species information
        species_list = retrieve_species(console, columns, species_) if species_ else []
        # build query
        query_list = build_query(species_list, query_)
        # retrieve the query key and web environment to download NCBI seq data
        total_read_count, ncbi_info_dict = ncbi_download_info(console, columns, query_list, database_, email_)
        # download NCBI sequences
        total_downloaded_seqs = download_ncbi_seqs(console, columns, total_read_count, batchsize_, database_, email_, ncbi_info_dict, output_)
        # write log to Terminal window
        try:
            console.print(f"[cyan]|             Results[/] | Number of sequences downloaded: {total_downloaded_seqs}/{total_read_count} ({round(total_downloaded_seqs / total_read_count * 100, 2)}%)")
        except ZeroDivisionError:
            console.print(f"[cyan]|             Results[/] | Number of sequences downloaded: 0")

##################
# DOWNLOAD SILVA #
##################
    if download_silva_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Download SILVA database")
        # check if all parameters have been provided
        check_params(console, {'"--output"': output_, '"--gene"': gene_, '"--db-type"': db_type_, '"--db-version"': db_version_})
        # set url, output_directory, and filename
        if db_type_.upper() == 'FULL':
            url = f'https://ftp.arb-silva.de/release_{db_version_}/Exports/SILVA_{db_version_}_{gene_.upper()}Ref_tax_silva.fasta.gz'
        elif db_type_.upper() == 'SUBSET':
            url = f'https://ftp.arb-silva.de/release_{db_version_}/Exports/SILVA_{db_version_}_{gene_.upper()}Ref_NR99_tax_silva.fasta.gz'
        else:
            console.print(f"[cyan]|               ERROR[/] | [bold yellow]incorrect value provided for '--db-type', aborting analysis...[/]\n")
            exit()
        output_directory = f'{os.path.dirname(output_)}/'
        if output_directory == '/':
            output_directory = ''
        zipfilename = url.split('/')[-1]
        outputfilename = output_.split('/')[-1]
        # download the zip file
        download_file(console, columns, url, output_directory, zipfilename)
        # unzip the downloaded file
        gunzip_with_progress(console, columns, output_directory, zipfilename, outputfilename, append = False)
        # remove intermediary files
        os.remove(f'{output_directory}{zipfilename}')

##########
# IMPORT #
##########
    if import_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Import sequence data into CRABS format")
        # check if all parameters have been provided (need to make a distinction in neccesary parameters between different formats)
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--format"': import_format_, '"--names"': names_, '"--nodes"': nodes_, '"--acc2taxid"': acc2tax_})
        # read documents into memory
        input_files = [input_, names_, nodes_, acc2tax_]
        input_file_size = sum(os.path.getsize(input_file) for input_file in input_files)
        with rich.progress.Progress(*columns) as progress_bar:
            task = progress_bar.add_task(console = console, description = "[cyan]| Read data to memory[/] |", total=input_file_size)
            input_to_memory = select_function(import_format_)
            try:
                seq_input_dict, initial_seq_number = input_to_memory(task, progress_bar, input_)
            except TypeError:
                console.print(f"\n[cyan]|               ERROR[/] | [bold yellow]'--import-format {import_format_}' not recognised, aborting analysis...[/]\n")
                exit()
            names_key_tax_number_value_dict, tax_number_key_names_value_dict, synonym_key_dict = names_to_memory(task, progress_bar, names_)
            tax_number_key_rank_and_tax_number_up_values_dict = nodes_to_memory(task, progress_bar, nodes_)
            acc_key_tax_number_value_dict = accession_to_memory(task, progress_bar, acc2tax_, seq_input_dict)
        # generate taxonomic lineages
        seq_input_dict, unresolved_lineage = generate_lineages(console, columns, ranks_, seq_input_dict, acc_key_tax_number_value_dict, names_key_tax_number_value_dict, synonym_key_dict, tax_number_key_rank_and_tax_number_up_values_dict, tax_number_key_names_value_dict)
        # fill out missing info
        seq_input_dict = fill_missing_lineages(console, columns, ranks_, seq_input_dict)
        # write to output
        dict_to_output(seq_input_dict, ranks_, output_)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Imported {len(seq_input_dict)} out of {initial_seq_number} sequences into CRABS format ({round(len(seq_input_dict) / initial_seq_number * 100, 2)}%)")
        if unresolved_lineage > 0:
            console.print(f"[cyan]|                    [/] | Could not resolve a taxonomic lineage for {unresolved_lineage} imported sequences ({round(unresolved_lineage / len(seq_input_dict) * 100, 2)}%)")

#########
# MERGE #
#########
    if merge_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Merge CRABS databases into a single file")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_})
        # check for multiple files and their existence
        file_list = check_files(console, input_)
        # merge databases based on "--uniq" parameter
        if uniq_:
            merged_seq_file, initial_read_count = merge_uniq_databases(console, columns, file_list) 
        else:
            merged_seq_file, initial_read_count = merge_databases(console, columns, file_list)
        # write merged data to output
        write_list_to_output(console, columns, merged_seq_file, output_)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Written {len(merged_seq_file)} sequences to {output_} by merging {len(file_list)} files containing {initial_read_count} sequences ({round(len(merged_seq_file) / initial_read_count * 100, 2)}%)")

#################
# IN SILICO PCR #
#################
    if in_silico_pcr_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Extract amplicons through in silico PCR")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--forward"': forward_, '"--reverse"': reverse_})
        # check primers for unknown bases and reverse complement reverse primer
        forward_ = unknown_base_conversion(forward_)
        reverse_ = unknown_base_conversion(reverse_)
        reverse_ = rev_comp(reverse_)
        # set parameters for cutadapt
        overlap = str(min(len(forward_), len(reverse_)))
        adapter = forward_ + '...' + reverse_
        # transform input_ to fasta format in a temp file
        temp_input_path, fasta_dict = crabs_to_fasta(console, columns, input_)
        # run cutadapt
        trimmed_seqs, untrimmed_seqs = cutadapt(console, columns, adapter, temp_input_path, fasta_dict, mismatch_, overlap, threads_, buffer_size_)
        # run cutadapt again if the relaxed_ parameter was provided
        if relaxed_:
            temp_input_path2, fasta_dict2 = list_to_fasta(console, columns, untrimmed_seqs)
            trimmed_seqs, untrimmed_seqs, relaxed_count = cutadapt_relaxed(console, columns, forward_, reverse_, temp_input_path2, fasta_dict2, mismatch_, overlap, threads_, trimmed_seqs, untrimmed_seqs, buffer_size_)
        # write data to output
        write_list_to_output(console, columns, trimmed_seqs, output_)
        if untrimmed_:
            write_list_to_output(console, columns, untrimmed_seqs, untrimmed_)
        # remove temporary files
        os.remove(temp_input_path)
        if relaxed_:
            os.remove(temp_input_path2)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Extracted {len(trimmed_seqs)} amplicons from {len(fasta_dict)} sequences ({round(len(trimmed_seqs) / len(fasta_dict) * 100, 2)}%)")
        if relaxed_:
            console.print(f"[cyan]|             Results[/] | {relaxed_count} amplicons were extracted by only the forward or reverse primer ({round(relaxed_count / len(trimmed_seqs) * 100, 2)}%)")

#############################
# PAIRWISE GLOBAL ALIGNMENT #
#############################
    if pairwise_global_alignment_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Retrieve amplicons without primer-binding regions")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--amplicons"': amplicons_, '"--forward"': forward_, '"--reverse"': reverse_, '"--percent-identity"': percent_identity_, '"--coverage"': coverage_})
        # read data into memory
        file_list = [amplicons_, input_]
        raw_fasta_dict, raw_fasta_list, amplicon_fasta_dict, amplicon_fasta_list = multiple_crabs_to_fasta(console, columns, file_list, size_select_)
        # write input to temp files in fasta format
        raw_temp_path, amplicon_temp_path = multiple_list_to_temp(console, columns, raw_fasta_list, amplicon_fasta_list)
        # run pairwise global alignment
        align_temp_path = usearch_global(console, columns, raw_temp_path, amplicon_temp_path, percent_identity_, threads_, raw_fasta_dict)
        # extract the sequence regions that conform to parameter settings
        amplicon_fasta_dict = extract_alignment_results(console, columns, align_temp_path, amplicon_fasta_dict, include_all_start_positions_, coverage_, forward_, reverse_, raw_fasta_dict)
        # write data to output
        write_dict_to_output(console, columns, amplicon_fasta_dict, output_)
        # remove intermediary files
        os.remove(raw_temp_path)
        os.remove(amplicon_temp_path)
        os.remove(align_temp_path)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Retrieved {len(amplicon_fasta_dict) - len(amplicon_fasta_list)} amplicons without primer-binding regions from {len(raw_fasta_dict)} sequences")

###############
# DEREPLICATE #
###############
    if dereplicate_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Dereplicate CRABS database")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_})
        # select dereplication function
        dereplication_function = select_function(dereplication_method_)
        # dereplicate data
        initial_read_count, seq_file = dereplication_function(console, columns, input_)
        # write data to output
        write_list_to_output(console, columns, seq_file, output_)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Written {len(seq_file)} unique sequences to {output_} out of {initial_read_count} initial sequences ({round(len(seq_file) / initial_read_count * 100, 2)}%)")

##########
# FILTER #
##########
    if filter_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Filter CRABS database")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_})
        # print which filtering parameters are included
        included_parameters = [key for key, value in {'"--minimum-length"': minimum_length_, '"--maximum-length"': maximum_length_, '"--maximum-n"': maximum_n_, '"--environmental"': environmental_, '"--no-species-id"': no_species_id_, '"--rank-na"': rank_na_}.items() if value not in [None, False]]
        console.print(f"[cyan]| Included parameters[/] | {', '.join(included_parameters)}")
        # read input file and parse data
        initial_read_count, seq_file, min_len_count, max_len_count, max_n_count, env_count, no_spec_count, rank_count = filter_function(console, columns, input_, minimum_length_, maximum_length_, maximum_n_, environmental_, no_species_id_, rank_na_)
        # write data to output
        write_list_to_output(console, columns, seq_file, output_)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Written {len(seq_file)} filtered sequences to {output_} out of {initial_read_count} initial sequences ({round(len(seq_file) / initial_read_count * 100, 2)}%)")
        for item in [min_len_count, max_len_count, max_n_count, env_count, no_spec_count, rank_count]:
            for key, value in item.items():
                if value != 0:
                    console.print(f"[cyan]|                    [/] | {key}: {value} sequences not passing filter ({round(value / initial_read_count * 100, 2)}%)")

##########
# SUBSET #
##########
    if subset_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Subset CRABS database")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_})
        # check inclusion or exclusion parameter
        subset_dict = select_subset(console, include_, exclude_)
        # read input file and parse data
        initial_read_count, seq_file = subset_function(console, columns, input_, subset_dict)
        # write data to output
        write_list_to_output(console, columns, seq_file, output_)
        # write log to Terminal window
        console.print(f"[cyan]|             Results[/] | Written {len(seq_file)} subsetted sequences to {output_} out of {initial_read_count} initial sequences ({round(len(seq_file) / initial_read_count * 100, 2)}%)")

####################
# DIVERSITY FIGURE #
####################
    if diversity_figure_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Generate horizontal bar chart displaying diversity within database")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--tax-level"': tax_level_})
        # read input file and parse data
        diversity_seq_dict, diversity_species_dict = parse_diversity(console, columns, input_, tax_level_)
        # generate horizontal bar chart
        horizontal_bar_chart(diversity_seq_dict, diversity_species_dict, output_)

##########################
# AMPLICON LENGTH FIGURE #
##########################
    if amplicon_length_figure_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Generate line graph displaying amplicon length distributions")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--tax-level"': tax_level_})
        # read input file and parse data
        amplicon_length_dict = parse_length(console, columns, input_, tax_level_)
        # generate line graph
        line_graph(amplicon_length_dict, output_)

###############################
# DATABASE COMPLETENESS TABLE #
###############################
    if completeness_table_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Generate table containing barcode availability for taxonomic group")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--names"': names_, '"--nodes"': nodes_, '"--species"': species_})
        # retrieve species of interest information
        species_dict = collections.defaultdict(dict)
        species_list = retrieve_species(console, columns, species_)
        for item in species_list:
            species_dict[item]['taxid'] = item
        # retrieve taxonomic lineages
        input_files = [names_, nodes_]
        input_file_size = sum(os.path.getsize(input_file) for input_file in input_files)
        with rich.progress.Progress(*columns) as progress_bar:
            task = progress_bar.add_task(console = console, description = "[cyan]|  NCBI tax to memory[/] |", total=input_file_size)
            names_key_tax_number_value_dict, tax_number_key_names_value_dict, synonym_key_dict = names_to_memory(task, progress_bar, names_)
            tax_number_key_rank_and_tax_number_up_values_dict = nodes_to_memory(task, progress_bar, nodes_)
        seq_input_dict, unresolved_lineage = generate_lineages(console, columns, ranks_, species_dict, {}, names_key_tax_number_value_dict, synonym_key_dict, tax_number_key_rank_and_tax_number_up_values_dict, tax_number_key_names_value_dict)
        # retrieve information about potential number of taxa shared with species of interest on genus and family level
        table_info_dict = calculate_ncbi_species_genera(console, columns, seq_input_dict, tax_number_key_rank_and_tax_number_up_values_dict)
        # retrieve information about number of taxa shared with species of interest on genus and family level in reference database
        table_info_dict = calculate_database_species_genera(console, columns, input_, table_info_dict, seq_input_dict)
        # write data to output
        completeness_table_output(table_info_dict, output_)

#####################
# PHYLOGENETIC TREE #
#####################
    if phylogenetic_tree_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Generate a phylogenetic tree based on barcodes for target species")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--tax-level"': tax_level_, '"--species"': species_})
        # retrieve species information
        species_list = retrieve_species(console, columns, species_)
        # parse input_
        input_dict = parse_phylo_input(console, columns, input_, tax_level_)
        # subset input_dict to only include relevant sequences
        subset_dict = subset_phylo_input(console, columns, input_dict, species_list)
        # generate intermediary fasta files
        with rich.progress.Progress(*columns) as progress_bar:
            task = progress_bar.add_task(console = console, description = "[cyan]|      Generate trees[/] |", total=len(subset_dict))
            for target_species in subset_dict:
                progress_bar.update(task, advance = 1)
                align_input = dict_to_fasta(subset_dict[target_species])
            # align sequences
                align_output = align_sequences(align_input)
            # generate phylogenetic tree
                generate_phylo_tree(align_output, output_, target_species)
            # remove intermediary files
                os.remove(align_input)
                os.remove(align_output)
                os.remove(f'{align_input}.dnd')

###################################
# AMPLIFICATION EFFICIENCY FIGURE #
###################################
    if amplification_efficiency_figure_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Generate a bar plot displaying mismatches in the primer-binding regions")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--amplicons"': amplicons_, '"--forward"': forward_, '"--reverse"': reverse_})
        # import data
        file_list = [amplicons_, input_]
        total_file_size = sum(os.path.getsize(file) for file in file_list)
        with rich.progress.Progress(*columns) as progress_bar:
            task = progress_bar.add_task(console = console, description = "[cyan]|         Import data[/] |", total=total_file_size)
            amplicons_dict = amplicon_import(task, progress_bar, amplicons_, tax_group_)
            raw_dict = raw_import(task, progress_bar, input_, amplicons_dict)
        # extract the primer-binding regions
        primer_binding_region_dict = extract_primer_regions(console, columns, amplicons_dict, raw_dict, forward_, reverse_)
        # calculate base proportions at each location within the primer-binding regions
        forward_position_dict = deconstruct_primer_regions(primer_binding_region_dict, 'forward')
        reverse_position_dict = deconstruct_primer_regions(primer_binding_region_dict, 'reverse')
        # transform dict to np.array
        forward_positions, forward_ordered_counts, forward_bottoms = dict_to_array(forward_position_dict)
        reverse_positions, reverse_ordered_counts, reverse_bottoms = dict_to_array(reverse_position_dict)
        # parse primer data for plotting
        forward_primer_info = parse_primer(forward_)
        reverse_primer_info = parse_primer(reverse_)
        # generate figure
        efficiency_barplot(forward_positions, forward_ordered_counts, forward_bottoms, reverse_positions, reverse_ordered_counts, reverse_bottoms, forward_primer_info, reverse_primer_info, forward_, reverse_, output_)

##########
# EXPORT #
##########
    if export_:
        # print function to console
        console.print(f"[cyan]|            Function[/] | Export CRABS database to {export_format_.upper()} format")
        # check if all parameters have been provided
        check_params(console, {'"--input"': input_, '"--output"': output_, '"--export-format"': export_format_})
        # select format function
        if export_format_.upper() == 'IDT-TEXT':
            initial_read_count, seq_file = idt_text(console, columns, input_)
            write_list_to_output(console, columns, seq_file, output_)
        elif export_format_.upper() == 'BLAST-NOTAX':
            blast_no_tax(console, columns, input_, output_)
        elif export_format_.upper() == 'BLAST-TAX':
            blast_tax(console, columns, input_, output_)
        else:
            output_to_format = select_function(export_format_)
            # read input file and parse data
            initial_read_count, seq_file = classifier_format(console, columns, input_, output_to_format)
            # write data to output
            write_list_to_output(console, columns, seq_file, output_)
            # write log to Terminal window
            console.print(f"[cyan]|             Results[/] | Written {len(seq_file)} sequences to {output_} out of {initial_read_count} initial sequences ({round(len(seq_file) / initial_read_count * 100, 2)}%)")
            
################
# EXECUTE CODE #
################
if __name__ == "__main__":
    crabs()