#!/usr/bin/env python3

"""
A script file to run DFAST_QC against multiple genomes in a given directory.

arguments:
    input_dir: str - The directory containing the MSS files
    fasta: str - acceptable file extension for the fasta files, default: fa,fasta,fna,fa.gz,fna.gz,fasta.gz
    out_dir: str - output dir name, default: dqc_out
    output: str - output file name, default: dqc_report.tsv
    taxid: int - taxid of the genomes (-1: auto, 0:prokaryote), default: 0
    thread: int - number of threads per process, default: 1 (recommended: 1)
    disable_tc, disable_cc, enable_gtdb: Same as the options in DFAST_QC

"""

import os
import glob
import argparse
# import log module
import logging

from mss_validate.batch_dqc import run_dqc_parallel, get_fasta_files
from mss_validate.read_dqc_result import collect_dqc_results, save_report


DQC_BASE_COMMAND = "dfast_qc --input_fasta {input_fasta} --out_dir {out_dir} --force"
REF_DIR = ""

# initialize logger
logger = logging.getLogger(__name__)

# Function to parse the command line arguments
def parse_args():
    parser = argparse.ArgumentParser(description="Run DFAST_QC in parallel for batch execution of multiple genomes")
    parser.add_argument("input_dir", type=str, help="The directory containing the FASTA files")
    parser.add_argument("--fasta", type=str, default="fa,fasta,fna,fa.gz,fasta.gz,fna.gz", help="Acceptable file extension for the fasta files. Default: fa,fasta,fna,fa.gz,fasta.gz,fna.gz")
    parser.add_argument("--out_dir", "-O", type=str, default="dqc_out", help="Name of output directory. Intermediate files will be saved here.")
    parser.add_argument("--output", "-o", type=str, default="dqc_report.tsv", help="Output file name")
    parser.add_argument("--taxid", type=int, default=-1, help="taxid for taxnomy check (-1: auto, 0:prokaryote)")
    parser.add_argument("--disable_tc", action="store_true", help="Disable taxonomy check using ANI")
    parser.add_argument("--disable_cc", action="store_true", help="Disable completeness check using CheckM")
    parser.add_argument("--enable_gtdb", action="store_true", help="Enable GTDB search")
    parser.add_argument("--thread", "-t", type=int, default=1, help="Number of threads to use")
    args = parser.parse_args()
    return args




if __name__ == "__main__":
    args = parse_args()
    fasta_files = get_fasta_files(args.input_dir, args.fasta)
    out_dir = args.out_dir

    # execute DFAST_QC
    results = run_dqc_parallel(fasta_files, out_dir, taxid=args.taxid, 
                               ref_dir=None, threads=args.thread,
                               disable_tc=args.disable_tc, disable_cc=args.disable_cc, enable_gtdb=args.enable_gtdb)


    # collect and save the results
    dqc_results = collect_dqc_results(fasta_files, out_dir)
    save_report(dqc_results, args.output, disable_tc=args.disable_tc, disable_cc=args.disable_cc, enable_gtdb=args.enable_gtdb)
    print("Done!")
    print("Output file is saved to: ", args.output)
    print("=" * 100)
    with open(args.output) as f:
        print(f.read())

