import os
import pandas as pd
from pling.utils import get_pling_root_dir
import math

configfile: "../config.yaml"

FASTAFILES = [el[0] for el in pd.read_csv(config["genomes_list"], header=None).values]
FASTAEXT = {os.path.splitext(os.path.basename(el))[0]:os.path.splitext(os.path.basename(el))[1] for el in FASTAFILES}
GENOMES = list(FASTAEXT.keys())
FASTAPATH = os.path.dirname(FASTAFILES[0])
OUTPUTPATH = config["output_dir"]
PREFIX = config["prefix"]
batch_size = config["batch_size"]
number_of_batches = math.ceil((len(GENOMES)*(len(GENOMES)-1)/2)/batch_size)

include: "../common_rules/common_rules.smk"

rule all:
    input:
        communities = f"{OUTPUTPATH}/containment/containment_communities"

if not config.get("sourmash_only", False):
    rule pairwise_seq_containment:
        input:
            batch_list=lambda wildcards: f"{OUTPUTPATH}/batches/batch_{wildcards.batch}.txt"
        output:
            containment=f"{OUTPUTPATH}/tmp_files/containment_batchwise/batch_{{batch}}_containment.tsv"
        threads: config["pairwise_seq_containment_threads"]
        resources:
            mem_mb=lambda wildcards, attempt: config["pairwise_seq_containment_mem"]*attempt
        params:
            genomes_list = config["genomes_list"],
            batch = lambda wildcards: wildcards.batch,
            outputpath = OUTPUTPATH,
            identity_threshold = config["identity_threshold"],
            pling_root_dir = get_pling_root_dir()
        shadow: "shallow"
        shell: """
            PYTHONPATH={params.pling_root_dir} python {params.pling_root_dir}/pling/jac_network_snakemake/seq_containment.py \
                --genomes_list {params.genomes_list} \
                --batch {params.batch} \
                --identity_threshold {params.identity_threshold} \
                --outputpath {params.outputpath} \
                --containment_output {output.containment} \
            """
