import os
import pandas as pd
import math
from pling.utils import get_pling_root_dir, get_fasta_file_info

configfile: "../config.yaml"

OUTPUTPATH = config["output_dir"]
PREFIX = config["prefix"]
batch_size = config["batch_size"]
FASTAFILES = [el[0] for el in pd.read_csv(config["genomes_list"], header=None).values]
FASTAEXT = {os.path.splitext(os.path.basename(el))[0]:os.path.splitext(os.path.basename(el))[1] for el in FASTAFILES}
GENOMES = list(FASTAEXT.keys())

def get_regions():
    if config.get("regions", False):
        return "--regions"
    else:
        return ""

def get_topology(topology):
    if config["topology"]=="None":
        return ""
    else:
        return f"--topology {topology}"

include: "../common_rules/common_rules.smk"

rule all:
    input:
        seq_containment = f"{OUTPUTPATH}/containment/all_pairs_containment_distance.tsv",
        communities = f"{OUTPUTPATH}/containment/containment_communities"

rule make_unimogs:
    input:
        batch_list=lambda wildcards: f"{OUTPUTPATH}/batches/batch_{wildcards.batch}.txt"
    output:
        containment=f"{OUTPUTPATH}/tmp_files/containment_batchwise/batch_{{batch}}_containment.tsv",
        unimog = f"{OUTPUTPATH}/unimogs/batch_{{batch}}_align.unimog",
        map = f"{OUTPUTPATH}/unimogs/batch_{{batch}}_map.txt"
    threads: config["make_unimogs_threads"]
    resources:
        mem_mb=lambda wildcards, attempt: config["make_unimogs_mem"]*attempt
    params:
        genomes_list = config["genomes_list"],
        outputpath = OUTPUTPATH,
        identity_threshold = config["identity_threshold"],
        containment_distance = config["seq_containment_distance"],
        pling_root_dir = get_pling_root_dir(),
        regions = get_regions(),
        topology = get_topology(config["topology"])
    shadow: "shallow"
    shell:
        """
        PYTHONPATH={params.pling_root_dir} python {params.pling_root_dir}/pling/align_snakemake/unimog.py \
            --genomes_list {params.genomes_list} \
            --batch {input.batch_list} \
            --identity_threshold {params.identity_threshold} \
            --containment_distance {params.containment_distance} \
            --outputpath {params.outputpath} \
            --containment_output {output.containment} \
            --unimog_output {output.unimog} \
            --map_output {output.map} \
            {params.regions} \
            {params.topology}
        """
