MAE_WORKDIR = cfg.MAE.renameLocalDir()
MAE_index_input,MAE_graph_file,MAE_index_output = cfg.MAE.getModuleIndexFiles("mae-pipeline",MAE_WORKDIR)

rule mae:
    input:  MAE_index_input,MAE_graph_file
    output: MAE_index_output
    run:
        if cfg.MAE.run:
            ci(str(MAE_WORKDIR), 'mae-pipeline')

rule mae_dependency:
    output: MAE_graph_file
    shell:
        """
        snakemake --nolock --rulegraph {MAE_index_output} | \
            sed -ne '/digraph snakemake_dag/,/}}/p' | \
            dot -Tsvg -Grankdir=TB > {output}
        """

rule sampleQC:
    input: cfg.getHtmlFromScript(MAE_WORKDIR / "QC" / "Datasets.R")


## MAE
rule mae_createSNVs:
    input:
        ncbi2ucsc = cfg.workDir / "Scripts/Pipeline/chr_NCBI_UCSC.txt",
        ucsc2ncbi = cfg.workDir / "Scripts/Pipeline/chr_UCSC_NCBI.txt",
        vcf_file  = lambda w: cfg.MAE.getVcf(w.vcf),
        bam_file  = lambda w: sa.getFilePath(w.rna, 'RNA_BAM_FILE'),
        script    = MAE_WORKDIR / "MAE" / "filterSNVs.sh",
        snv_params = lambda w: cfg.processedDataDir / "mae" / "params/snvs" / (w.rna + "_snvParams.csv")
    output:
        snvs_filename = cfg.processedDataDir / "mae" / "snvs" / "{vcf}--{rna}.vcf.gz",
        snvs_index = cfg.processedDataDir / "mae" / "snvs" / "{vcf}--{rna}.vcf.gz.tbi"
    params:
        bcftools = cfg.getTool("bcftoolsCmd"),
        samtools = cfg.getTool("samtoolsCmd")
    threads: 4
    resources:
        mem_mb=lambda wildcards, threads: threads * 1500
    shell:
        """
        {input.script} {input.ncbi2ucsc} {input.ucsc2ncbi} {input.vcf_file} \
        {wildcards.vcf} {input.bam_file} {output.snvs_filename} \
        {params.bcftools} {params.samtools} {threads}
        """

rule mae_allelicCounts:
    input:
        ncbi2ucsc = cfg.workDir / "Scripts/Pipeline/chr_NCBI_UCSC.txt",
        ucsc2ncbi = cfg.workDir / "Scripts/Pipeline/chr_UCSC_NCBI.txt",
        vcf_file  = rules.mae_createSNVs.output.snvs_filename,
        bam_file  = lambda w: sa.getFilePath(w.rna, 'RNA_BAM_FILE'),
        fasta     = lambda w: cfg.MAE.getGenomePath(w.rna),
        dict      = lambda w: cfg.genome.getFastaDict(cfg.MAE.getGenomePath(w.rna)),
        script    = MAE_WORKDIR / "MAE" / "ASEReadCounter.sh"
    output:
        counted = cfg.processedDataDir / "mae" / "allelic_counts" / "{vcf}--{rna}.csv.gz"
    threads: 4
    resources:
        mem_mb=lambda wildcards, threads: threads * 2000
    params:
        bcftools = cfg.getTool("bcftoolsCmd"),
        samtools = cfg.getTool("samtoolsCmd"),
        gatk = cfg.getTool("gatkCmd"),
        gatkIgnoreHeaderCheck = cfg.MAE.get("gatkIgnoreHeaderCheck")
    shell:
        """
        {input.script} {input.ncbi2ucsc} {input.ucsc2ncbi} \
        {input.vcf_file} {input.bam_file} {wildcards.vcf}--{wildcards.rna} \
        {input.fasta} {params.gatkIgnoreHeaderCheck} {output.counted} \
        {params.bcftools} {params.samtools} {params.gatk} {threads}
        """
