#! /usr/bin/env python

import argparse
import contextlib
import datetime
import os
import sys
import time

if (__package__ is None) or (__package__ == ''):
    repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    if repo_root not in sys.path:
        sys.path.insert(0, repo_root)

from csubst.__init__ import __version__
from csubst import param
from csubst import runtime


class _TeeTextStream(object):
    def __init__(self, primary, secondary):
        self.primary = primary
        self.secondary = secondary
        self.encoding = getattr(primary, 'encoding', None)
        self.errors = getattr(primary, 'errors', None)

    def write(self, text):
        self.primary.write(text)
        return self.secondary.write(text)

    def writelines(self, lines):
        for line in lines:
            self.write(line)

    def flush(self):
        self.primary.flush()
        self.secondary.flush()

    def isatty(self):
        return bool(getattr(self.primary, 'isatty', lambda: False)())

    def fileno(self):
        return self.primary.fileno()


def strtobool(value):
    if isinstance(value, bool):
        return value
    value = str(value).strip().lower()
    if value in {'y', 'yes', 't', 'true', 'on', '1'}:
        return True
    if value in {'n', 'no', 'f', 'false', 'off', '0'}:
        return False
    raise argparse.ArgumentTypeError('invalid truth value: {}'.format(value))


def _default_prostt5_cache_file():
    return "csubst_prostt5_cache.tsv"


def _default_sa_state_cache_file():
    return "csubst_3di_state_cache.npz"


_OUTPUT_NAMESPACE_DEFAULTS = {
    'benchmark': {
        'outdir': 'csubst_benchmark',
        'output_prefix': 'csubst',
    },
    'benchmark-plot': {
        'outdir': 'csubst_benchmark_plot',
        'output_prefix': 'csubst',
    },
    'doctor': {
        'outdir': 'csubst_doctor',
        'output_prefix': 'csubst',
    },
    'simulate': {
        'outdir': 'csubst_simulate',
        'output_prefix': 'csubst',
    },
    'search': {
        'outdir': 'csubst_search',
        'output_prefix': 'csubst',
    },
    'analyze': {
        'outdir': 'csubst_search',
        'output_prefix': 'csubst',
    },
    'inspect': {
        'outdir': 'csubst_inspect',
        'output_prefix': 'csubst',
    },
    'sites': {
        'outdir': 'csubst_sites',
        'output_prefix': 'csubst',
    },
    'site': {
        'outdir': 'csubst_sites',
        'output_prefix': 'csubst',
    },
}


def _get_subcommand_label(args, default_name):
    subcommand = str(getattr(args, 'subcommand', '')).strip()
    if subcommand == '':
        return default_name
    return subcommand


def _resolve_output_namespace_defaults(argv):
    tokens_with_values = {'--outdir', '--output_prefix', '--log_file'}
    i = 0
    while i < len(argv):
        token = str(argv[i])
        if token in tokens_with_values:
            i += 2
            continue
        if any([token.startswith(opt + '=') for opt in tokens_with_values]):
            i += 1
            continue
        if token.startswith('-'):
            i += 1
            continue
        defaults = _OUTPUT_NAMESPACE_DEFAULTS.get(token, None)
        if defaults is not None:
            return dict(defaults)
        break
    return {
        'outdir': '.',
        'output_prefix': 'csubst',
    }


def _add_output_namespace_args(parser, default_outdir='.', default_prefix='csubst', include_outdir=True, include_output_prefix=True, include_log_file=True):
    if include_outdir:
        parser.add_argument('--outdir', metavar='PATH', default=default_outdir, type=str,
                            help='default=%(default)s: Output directory for generated files.')
    if include_output_prefix:
        parser.add_argument('--output_prefix', metavar='STEM', default=default_prefix, type=str,
                            help='default=%(default)s: Output filename stem used inside --outdir.')
    if include_log_file:
        parser.add_argument('--log_file', metavar='PATH', default='', type=str,
                            help='default=infer from --outdir/--output_prefix: PATH to log file. '
                                 'Relative paths are resolved under --outdir.')


def _make_output_parent_parser(default_outdir='.', default_prefix='csubst'):
    parser = argparse.ArgumentParser(add_help=False)
    _add_output_namespace_args(parser, default_outdir=default_outdir, default_prefix=default_prefix)
    return parser


def _resolve_log_file_from_argv(argv):
    parser = argparse.ArgumentParser(add_help=False)
    defaults = _resolve_output_namespace_defaults(argv)
    _add_output_namespace_args(
        parser,
        default_outdir=defaults['outdir'],
        default_prefix=defaults['output_prefix'],
        include_outdir=True,
        include_output_prefix=True,
        include_log_file=True,
    )
    try:
        args, _ = parser.parse_known_args(argv)
        layout = {
            'outdir': args.outdir,
            'output_prefix': args.output_prefix,
            'log_file': args.log_file,
        }
        runtime.ensure_output_layout(layout, create_dir=True)
        return layout['log_file']
    except Exception:
        return os.path.abspath('csubst.log')


def get_global_parameters_or_exit(args):
    try:
        return param.get_global_parameters(args)
    except (ValueError, AssertionError) as e:
        sys.stderr.write(str(e).strip() + '\n')
        sys.exit(2)

def command_dataset(args):
    print('csubst dataset start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_dataset import main_dataset
    main_dataset(g)
    print('csubst dataset: Time elapsed: {:,} sec'.format(int(time.time() - start)))
    print('csubst dataset end:', datetime.datetime.now(datetime.timezone.utc), flush=True)

def command_simulate(args):
    print('csubst simulate start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_simulate import main_simulate
    main_simulate(g)
    print('csubst simulate: Time elapsed: {:,} sec'.format(int(time.time() - start)))
    print('csubst simulate end:', datetime.datetime.now(datetime.timezone.utc), flush=True)

def command_sites(args):
    command_label = _get_subcommand_label(args, 'sites')
    print('csubst {} start:'.format(command_label), datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    if str(getattr(args, 'log_file', '')).strip() == '':
        args.log_file = runtime.default_site_log_path(base_dir=os.getcwd(), create_dir=False)
    g = get_global_parameters_or_exit(args)
    from csubst.main_sites import main_sites
    main_sites(g)
    print('csubst {}: Time elapsed: {:,} sec'.format(command_label, int(time.time() - start)))
    print('csubst {} end:'.format(command_label), datetime.datetime.now(datetime.timezone.utc), flush=True)
    if (g['pdb'] is not None):
        # This should be executed at the very end, otherwise CSUBST's main process is killed.
        from csubst.parser_pymol import quit_pymol
        quit_pymol()

def command_search(args):
    command_label = _get_subcommand_label(args, 'search')
    print('csubst {} start:'.format(command_label), datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_analyze import main_analyze
    main_analyze(g)
    print('csubst {}: Time elapsed: {:,} sec'.format(command_label, int(time.time() - start)))
    print('csubst {} end:'.format(command_label), datetime.datetime.now(datetime.timezone.utc), flush=True)


def command_benchmark(args):
    print('csubst benchmark start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_benchmark import main_benchmark
    main_benchmark(g)
    print('csubst benchmark: Time elapsed: {:,} sec'.format(int(time.time() - start)))
    print('csubst benchmark end:', datetime.datetime.now(datetime.timezone.utc), flush=True)


def command_benchmark_plot(args):
    print('csubst benchmark-plot start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_benchmark_plot import main_benchmark_plot
    main_benchmark_plot(g)
    print('csubst benchmark-plot: Time elapsed: {:,} sec'.format(int(time.time() - start)))
    print('csubst benchmark-plot end:', datetime.datetime.now(datetime.timezone.utc), flush=True)


def command_doctor(args):
    print('csubst doctor start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_doctor import main_doctor
    main_doctor(g)
    print('csubst doctor: Time elapsed: {:,} sec'.format(int(time.time() - start)))
    print('csubst doctor end:', datetime.datetime.now(datetime.timezone.utc), flush=True)


command_site = command_sites
command_analyze = command_search

def command_inspect(args):
    print('csubst inspect start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    start = time.time()
    g = get_global_parameters_or_exit(args)
    from csubst.main_inspect import main_inspect
    main_inspect(g)
    print('csubst inspect: Time elapsed: {:,} sec'.format(int(time.time() - start)))
    print('csubst inspect end:', datetime.datetime.now(datetime.timezone.utc), flush=True)


def _add_search_subcommand_args(parser):
    # branch combinations
    parser.add_argument('--max_arity', metavar='INTEGER', default=2, type=int,
                        help='default=%(default)s: The maximum combinatorial number of branches (K). '
                             'Set 2 for branch pairs. 3 or larger for higher-order combinations.')
    parser.add_argument('--exhaustive_until', metavar='INTEGER', default=2, type=int,
                        help='default=%(default)s: Analyze all independent branch combinations until specified arity (K). '
                             'Be careful of combinatorial explosion if set to 3 or higher. '
                             'Set to 1 for foreground-only analysis.')
    parser.add_argument('--max_combination', metavar='INTEGER', default=10000, type=int,
                        help='default=%(default)s: Maximum number of branch combinations to generate at K+1. '
                             'If possible branch combinations at K+1 are more than this number, '
                             'top N convergent combinatinons are selected with the thresholds specified by --cutoff_stat. '
                             'The first stat in --cutoff_stat is most prioritized.')
    parser.add_argument('--exclude_sister_pair', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Set to "yes" for excluding sister branches in branch combination analysis.')
    parser.add_argument('--cutoff_stat', metavar='[STAT1,VALUE1|STAT2,VALUE2|...]',
                        default='OCNany2spe,2.0|omegaCany2spe,5.0', type=str,
                        help='default=%(default)s: Cutoff statistics for searching higher-order branch combinations. '
                             'STAT is a column in csubst_cb_N.tsv. Regular expressions are supported. '
                             'e.g., "N_sub_[0-9]+" to specify the number of branch-wise nonsynonymous substitutions '
                             'in all branches. VALUE is the minimum value, '
                             'and branch combinations with smaller values will be excluded.')
    parser.add_argument('--output_stat', metavar='STAT1,STAT2,...',
                        default='any2any,any2dif,any2spe', type=str,
                        help='default=%(default)s: Comma-delimited combinatorial-substitution statistics '
                             'to compute and report. Choose from '
                             'any2any, any2spe, any2dif, dif2any, dif2spe, dif2dif, '
                             'spe2any, spe2spe, spe2dif.')
    parser.add_argument('--branch_dist', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Set "yes" to calculate inter-branch distance.')
    # Substitution outputs
    parser.add_argument('--b', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Branch output. Set "yes" to generate the output tsv.')
    parser.add_argument('--s', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Site output. Set "yes" to generate the output tsv.')
    parser.add_argument('--cs', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Combinatorial-site output. Set "yes" to generate the output tsv.')
    parser.add_argument('--cb', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Combinatorial-branch output. Set "yes" to generate the output tsv.')
    parser.add_argument('--bs', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Branch-site output. Set "yes" to generate the output tsv.')
    parser.add_argument('--cbs', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Combinatorial-branch-site output. Set "yes" to generate the output tsv.')
    # Omega_C calculation
    parser.add_argument('--expectation_method', metavar='codon_model|urn', default=None, type=str,
                        choices=['codon_model', 'urn'],
                        help='default=codon_model: Method to calculate omega_C expected values. '
                             '"codon_model" utilizes a codon substitution model in ancestral-state reconstruction. '
                             'In addition to base substitution models, codon frequencies and among-site rate '
                             'heterogeneity are taken into account. '
                             'Described in Fukushima and Pollock (2023, https://doi.org/10.1038/s41559-022-01932-7). '
                             '"urn" uses among-site randomization (weighted urn sampling).')
    parser.add_argument('--urn_model', metavar='wallenius|fisher|factorized_approx', default=None, type=str,
                        choices=['wallenius', 'fisher', 'factorized_approx'],
                        help='default=wallenius: Urn expectation model used when --expectation_method urn. '
                             '"wallenius" uses Wallenius-type inclusion probabilities; '
                             '"fisher" uses Fisher-type inclusion probabilities; '
                             '"factorized_approx" uses the legacy factorized approximation.')
    parser.add_argument('--omegaC_method', metavar='[submodel|modelfree]', default=None, type=str,
                        choices=['modelfree','submodel'],
                        help='Deprecated alias for --expectation_method. '
                             '"submodel" maps to --expectation_method codon_model. '
                             '"modelfree" maps to --expectation_method urn.')
    parser.add_argument('--calc_omega_pvalue', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Experimental feature. Estimate branch-combination-wise one-sided empirical '
                             'P values of omega_C by substitution randomization (--expectation_method urn only). '
                             'When --calibrate_longtail is active, p-value columns are suffixed with "_nocalib".')
    parser.add_argument('--omega_pvalue_null_model', metavar='hypergeom|poisson|poisson_full|nbinom', default='hypergeom', type=str,
                        choices=['hypergeom', 'poisson', 'poisson_full', 'nbinom'],
                        help='default=%(default)s: Experimental feature. Null count generator for --calc_omega_pvalue. '
                             '"hypergeom" uses legacy site-subset randomization with integer draw sizes. '
                             '"poisson" uses factorized branch/site continuous-rate nulls (Poisson sampling from expected counts). '
                             '"poisson_full" uses branch-specific site masses without branch-site factorization; '
                             '"nbinom" adds Gamma-Poisson overdispersion on factorized mean counts. '
                             'Both Poisson-based models reduce sensitivity to --min_sub_pp.')
    parser.add_argument('--omega_pvalue_nbinom_alpha', metavar='auto|FLOAT', default='auto', type=str,
                        help='default=%(default)s: Experimental feature. Overdispersion coefficient for '
                             '--omega_pvalue_null_model nbinom. Variance is mean + alpha*mean^2. '
                             '"auto" estimates alpha from observed-vs-expected count dispersion.')
    parser.add_argument('--omega_pvalue_niter_schedule', metavar='0|N1,N2,...', default='0', type=str,
                        help='default=%(default)s: Experimental feature. Iteration schedule for staged '
                             '--calc_omega_pvalue refinement under all --omega_pvalue_null_model modes '
                             '(upper-tail edge focused). '
                             'Set 0 (auto) to use adaptive stages (100,1000). '
                             'Custom values should be comma-delimited strictly increasing integers '
                             '(e.g., 200,2000,10000).')
    parser.add_argument('--omega_pvalue_refine_upper_edge_bins', metavar='INTEGER', default=2, type=int,
                        help='default=%(default)s: Experimental feature. Number of upper-tail p-value edge bins '
                             'used to select rows for the next refinement stage. '
                             'Rows with exceedance rank <= this value are refined; set 0 to disable.')
    parser.add_argument('--omega_pvalue_rounding', metavar='round|stochastic|floor|ceil', default='stochastic', type=str,
                        choices=['round', 'stochastic', 'floor', 'ceil'],
                        help='default=%(default)s: Experimental feature. How fractional branch-wise substitution masses are converted to '
                             'integer draw sizes in randomization for --calc_omega_pvalue. '
                             '"round" reproduces legacy behavior; "stochastic" applies per-iteration '
                             'stochastic rounding; "floor"/"ceil" provide deterministic lower/upper bounds.')
    parser.add_argument('--asrv', metavar='no|pool|sn|each|file|file_each', default='each', type=str,
                        choices=['no', 'pool', 'sn', 'each', 'file', 'file_each'],
                        help='default=%(default)s: Experimental. Correct among-site rate variation in omega calculation. '
                             'This option is used with --expectation_method urn but not with --expectation_method codon_model. '
                             '"no", No ASRV, meaning a uniform rate among sites. '
                             '"pool", All categories of substitutions are pooled to calculate a single set of ASRV. '
                             '"sn", Synonymous and nonsynonymous substitutions are processed individually '
                             'to calculate their respective ASRVs (2 sets). '
                             '"each", Each of 61x60 patterns of substitutions are processed individually '
                             'to calculate their respective ASRVs. '
                             '"file", ASRV is obtained from the IQ-TREE\'s .rate file (1 set). '
                             '"file_each", Hybrid of site-rate and category-specific empirical site mass. ')
    parser.add_argument('--asrv_dirichlet_alpha', metavar='FLOAT', default=1.0, type=float,
                        help='default=%(default)s: Dirichlet pseudo-count alpha for branch-wise ASRV normalization '
                             'in --asrv sn/each/file_each. Set >0 to smooth sparse site-mass profiles.')
    parser.add_argument('--epistasis_apply_to', metavar='N|S|NS', default='N', type=str,
                        help='default=%(default)s: Experimental feature. Expected-count channels to apply structure-aware epistasis correction. '
                             '"N" adjusts nonsynonymous expectations (ECN), '
                             '"S" adjusts synonymous expectations (ECS; useful as a negative-control check), '
                             '"NS" adjusts both.')
    parser.add_argument('--epistasis_site_metric', metavar='off|auto|degree|proximity|hybrid', default='off', type=str,
                        help='default=%(default)s: Experimental feature. Site-level structure feature for epistasis weighting. '
                             '"off" keeps epistasis-site-feature selection disabled by default '
                             '(auto-enabled when --epistasis_beta is active), '
                             '"degree" uses contact degree, '
                             '"proximity" uses distance-weighted local proximity, '
                             '"hybrid" averages degree/proximity z-scores, '
                             '"auto" selects hybrid when both are available (otherwise falls back).')
    parser.add_argument('--epistasis_beta', metavar='off|auto|FLOAT', default='off', type=str,
                        help='default=%(default)s: Experimental feature. Strength of structure-aware epistasis correction. '
                             'Set "off"/0 to disable, a non-negative float for fixed strength, '
                             'or "auto" for branch-level CV tuning.')
    parser.add_argument('--epistasis_beta_partition', metavar='global|branch_depth', default='global', type=str,
                        help='default=%(default)s: Experimental feature. Scope of epistasis beta estimation. '
                             '"global" fits one beta per channel; '
                             '"branch_depth" fits beta per branch-depth bin to reduce over-shrinkage from global averaging.')
    parser.add_argument('--epistasis_branch_depth_bins', metavar='INT', default=3, type=int,
                        help='default=%(default)s: Experimental feature. Number of branch-depth bins for --epistasis_beta_partition branch_depth.')
    parser.add_argument('--epistasis_feature_mode', metavar='single|paired', default='single', type=str,
                        help='default=%(default)s: Experimental feature. Site-feature dimensionality for epistasis weighting. '
                             '"single" uses resolved --epistasis_site_metric only; '
                             '"paired" uses both degree and proximity when both are available.')
    parser.add_argument('--epistasis_clip', metavar='auto|FLOAT', default='3.0', type=str,
                        help='default=%(default)s: Experimental feature. Absolute clipping bound for the internal epistasis score '
                             'before exponentiation. Set a positive float or "auto".')
    parser.add_argument('--epistasis_joint_auto', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Experimental feature. Jointly optimize epistasis alpha/clip with beta CV. '
                             'When enabled with --epistasis_beta auto, the best (alpha, clip, beta) tuple is selected by CV score.')
    parser.add_argument('--epistasis_joint_alpha_grid', metavar='FLOAT,FLOAT,...', default='0,0.5,1,2', type=str,
                        help='default=%(default)s: Experimental feature. Candidate ASRV Dirichlet alpha grid for --epistasis_joint_auto.')
    parser.add_argument('--epistasis_joint_clip_grid', metavar='FLOAT,FLOAT,...', default='1.5,2,2.5,3,4,5', type=str,
                        help='default=%(default)s: Experimental feature. Candidate clip grid for --epistasis_joint_auto.')
    parser.add_argument('--epistasis_degree_file', metavar='PATH', default='', type=str,
                        help='default=%(default)s: Experimental feature. Optional precomputed site-wise structure-degree table. '
                             'If omitted, structure degree is computed during search when --epistasis_pdb is set.')
    parser.add_argument('--epistasis_pdb', metavar='PATH/PDB_CODE/besthit', default='', type=str,
                        help='default=%(default)s: Experimental feature. Structure source for search-time epistasis degree generation. '
                             'PATH: local structure file; PDB code: fetched from PDB; '
                             '"besthit": online sequence search + best-hit retrieval.')
    parser.add_argument('--epistasis_database', metavar='DATABASE1,DATABASE2,...', default='swissmodel,pdb,alphafold,alphafill', type=str,
                        help='default=%(default)s: Experimental feature. Comma-delimited database priority for --epistasis_pdb besthit. '
                             'Supported: swissmodel, pdb, alphafold, alphafill.')
    parser.add_argument('--epistasis_database_evalue_cutoff', metavar='FLOAT', default=1.0, type=float,
                        help='default=%(default)s: Experimental feature. E-value cutoff for --epistasis_pdb besthit searches.')
    parser.add_argument('--epistasis_database_minimum_identity', metavar='FLOAT', default=0.25, type=float,
                        help='default=%(default)s: Experimental feature. Minimum sequence identity for MMseqs2 PDB search in --epistasis_pdb besthit.')
    parser.add_argument('--epistasis_database_timeout', metavar='FLOAT', default=30.0, type=float,
                        help='default=%(default)s: Experimental feature. Network timeout in seconds for --epistasis_pdb besthit retrieval.')
    parser.add_argument('--epistasis_user_alignment', metavar='PATH', default='', type=str,
                        help='default=%(default)s: Experimental feature. Optional user-provided protein alignment for mapping structure residues '
                             'to alignment sites in search-time epistasis degree generation.')
    parser.add_argument('--epistasis_contact_distance', metavar='FLOAT', default=8.0, type=float,
                        help='default=%(default)s: Experimental feature. CA-CA distance threshold (Angstrom) used to compute structure contact degree.')
    parser.add_argument('--epistasis_mafft_exe', metavar='PATH', default='mafft', type=str,
                        help='default=%(default)s: Experimental feature. PATH to mafft executable for search-time structure mapping.')
    parser.add_argument('--epistasis_mafft_op', metavar='FLOAT', default=-1, type=float,
                        help='default=%(default)s: Experimental feature. mafft --op value for search-time structure mapping. -1 uses default.')
    parser.add_argument('--epistasis_mafft_ep', metavar='FLOAT', default=-1, type=float,
                        help='default=%(default)s: Experimental feature. mafft --ep value for search-time structure mapping. -1 uses default.')
    parser.add_argument('--epistasis_pymol_max_num_chain', metavar='INT', default=20, type=int,
                        help='default=%(default)s: Experimental feature. Maximum chain count threshold for candidate structures in --epistasis_pdb besthit.')
    parser.add_argument('--epistasis_degree_outfile', metavar='PATH', default='csubst_epistasis_structure_degree.tsv', type=str,
                        help='default=%(default)s: Experimental feature. Output TSV path for search-time structure degree table.')
    parser.add_argument('--pseudocount_alpha', metavar='FLOAT|auto', default='0.0', type=str,
                        help='default=%(default)s: Dirichlet pseudo-count alpha for convergence-statistic '
                             'shrinkage. Set 0 to disable smoothing. '
                             'Use "auto" for Empirical-Bayes alpha estimation.')
    parser.add_argument('--pseudocount_mode', metavar='none|symmetric|empirical', default='none', type=str,
                        choices=['none', 'symmetric', 'empirical'],
                        help='default=%(default)s: Pseudocount mode. '
                             '"none" keeps legacy behavior; '
                             '"symmetric" applies equal pseudo-counts; '
                             '"empirical" shrinks toward global substitution-category frequencies.')
    parser.add_argument('--pseudocount_target', metavar='observed|expected|both', default='both', type=str,
                        choices=['observed', 'expected', 'both'],
                        help='default=%(default)s: Apply pseudo-counts to observed counts, expected counts, or both.')
    parser.add_argument('--pseudocount_report', action='store_true',
                        help='Write pseudocount configuration and pre-smoothing sparsity diagnostics to logs and outputs.')
    parser.add_argument('--calibrate_longtail', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Calibrate dS_C to match the distribution range of dS_C with dN_C '
                             'by quantile-based transformation.')

def _build_parser():
    # Main parser
    parser = argparse.ArgumentParser(description='CSUBST - a toolkit for molecular convergence detection. For details, see https://github.com/kfuku52/csubst')
    parser.add_argument('--version', action='version', version='CSUBST version: {}'.format(__version__))
    _add_output_namespace_args(parser, include_outdir=False, include_output_prefix=False, include_log_file=True)
    subparsers = parser.add_subparsers(dest='subcommand')

    # shared: common
    psr_co = argparse.ArgumentParser(add_help=False)
    psr_co.add_argument('--alignment_file', metavar='PATH', default='', type=str,
                       help='default=%(default)s: PATH to in-frame codon alignment (FASTA format).')
    psr_co.add_argument('--full_cds_alignment_file', metavar='PATH', default='', type=str,
                       help='default=%(default)s: PATH to aligned full-length CDS FASTA. '
                            'Required when --nonsyn_recode is "3di20".')
    psr_co.add_argument('--rooted_tree_file', metavar='PATH', default='', type=str,
                       help='default=%(default)s: PATH to input rooted tree (Newick format). Tip labels should be consistent with --alignment_file.')
    psr_co.add_argument('--genetic_code', metavar='INTEGER', type=int, required=False, default=1,
                       help='default=%(default)s: NCBI codon table ID. 1 = "Standard". See here: '
                            'https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi')
    psr_co.add_argument('--infile_type', metavar='[iqtree]', default='iqtree', type=str, choices=['iqtree',],
                       help='default=%(default)s: The input file format.')
    psr_co.add_argument('--threads', metavar='INTEGER', default=1, type=int, required=False,
                       help='default=%(default)s: The number of CPUs for parallel computations.')
    psr_co.add_argument('--float_type', metavar='16|32|64', default=64, type=int, required=False,
                        help='default=%(default)s: Float data type for tensors. "16" is not recommended.')
    psr_co.add_argument('--sub_tensor_backend', metavar='auto|dense|sparse', default='auto', type=str,
                        choices=['auto', 'dense', 'sparse'],
                        help='default=%(default)s: Backend for substitution tensors. '
                             '"auto" selects sparse reducers for low-density tensors.')
    psr_co.add_argument('--sub_tensor_sparse_density_cutoff', metavar='FLOAT', default=0.15, type=float, required=False,
                        help='default=%(default)s: Density cutoff for --sub_tensor_backend auto. '
                             'Sparse reducers are selected when estimated substitution tensor density is <= this value.')
    psr_co.add_argument('--sub_tensor_auto_sparse_min_elements', metavar='INT', default=100000000, type=int, required=False,
                        help='default=%(default)s: For --sub_tensor_backend auto, build sparse substitution tensors '
                             'when estimated tensor elements (branch x site x group x from x to) are >= this threshold.')
    psr_co.add_argument('--parallel_backend', metavar='auto|multiprocessing|threading', default='auto', type=str,
                        choices=['auto', 'multiprocessing', 'threading'],
                        help='default=%(default)s: Backend policy for parallel workloads. '
                             '"auto" uses multiprocessing.')
    psr_co.add_argument('--parallel_chunk_factor', metavar='INT', default=1, type=int, required=False,
                        help='default=%(default)s: Number of chunks per worker for general parallel workloads.')
    psr_co.add_argument('--parallel_chunk_factor_reducer', metavar='INT', default=4, type=int, required=False,
                        help='default=%(default)s: Number of chunks per worker for dense/sparse combinatorial reducers.')
    psr_co.add_argument('--parallel_min_items_sub_tensor', metavar='INT', default=256, type=int, required=False,
                        help='default=%(default)s: Minimum number of branch pairs to enable parallel substitution-tensor generation.')
    psr_co.add_argument('--parallel_min_items_per_job_sub_tensor', metavar='INT', default=64, type=int, required=False,
                        help='default=%(default)s: Minimum branch pairs assigned per worker in substitution-tensor generation.')
    psr_co.add_argument('--parallel_min_items_node_union', metavar='INT', default=20000, type=int, required=False,
                        help='default=%(default)s: Minimum pair-union workload to enable parallel node-union in branch-combination generation.')
    psr_co.add_argument('--parallel_min_items_per_job_node_union', metavar='INT', default=5000, type=int, required=False,
                        help='default=%(default)s: Minimum pair-unions assigned per worker in node-union.')
    psr_co.add_argument('--parallel_min_items_nc_matrix', metavar='INT', default=100000, type=int, required=False,
                        help='default=%(default)s: Minimum number of branch combinations to enable parallel nc-matrix population.')
    psr_co.add_argument('--parallel_min_items_per_job_nc_matrix', metavar='INT', default=25000, type=int, required=False,
                        help='default=%(default)s: Minimum branch combinations assigned per worker in nc-matrix population.')
    psr_co.add_argument('--parallel_min_items_cb', metavar='INT', default=20000, type=int, required=False,
                        help='default=%(default)s: Minimum number of branch combinations to enable parallel cb reducers.')
    psr_co.add_argument('--parallel_min_items_per_job_cb', metavar='INT', default=5000, type=int, required=False,
                        help='default=%(default)s: Minimum branch combinations assigned per worker in cb reducers.')
    psr_co.add_argument('--parallel_min_rows_cbs', metavar='INT', default=200000, type=int, required=False,
                        help='default=%(default)s: Minimum number of output rows (combination x site) to enable parallel cbs reducers.')
    psr_co.add_argument('--parallel_min_rows_per_job_cbs', metavar='INT', default=50000, type=int, required=False,
                        help='default=%(default)s: Minimum cbs output rows assigned per worker.')
    psr_co.add_argument('--parallel_min_items_branch_dist', metavar='INT', default=20000, type=int, required=False,
                        help='default=%(default)s: Minimum number of branch combinations to enable parallel branch-distance calculation.')
    psr_co.add_argument('--parallel_min_items_per_job_branch_dist', metavar='INT', default=5000, type=int, required=False,
                        help='default=%(default)s: Minimum branch combinations assigned per worker in branch-distance calculation.')
    psr_co.add_argument('--parallel_min_items_expected_state', metavar='INT', default=50000000, type=int, required=False,
                        help='default=%(default)s: Minimum estimated expected-state workload '
                             '(branch x site x state x state) to enable parallel expected-state projection.')
    psr_co.add_argument('--parallel_min_items_per_job_expected_state', metavar='INT', default=10000000, type=int, required=False,
                        help='default=%(default)s: Minimum estimated expected-state workload per worker.')
    psr_co.add_argument('--float_digit', metavar='INT', default=4, type=int, required=False,
                        help='default=%(default)s: Number of output float digits.')
    psr_co.add_argument('--write_instantaneous_rate_matrix', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Write csubst instantaneous codon rate matrix TSV.')

    psr_out_analyze = _make_output_parent_parser(default_outdir='csubst_search', default_prefix='csubst')
    psr_out_benchmark = _make_output_parent_parser(default_outdir='csubst_benchmark', default_prefix='csubst')
    psr_out_benchmark_plot = _make_output_parent_parser(default_outdir='csubst_benchmark_plot', default_prefix='csubst')
    psr_out_doctor = _make_output_parent_parser(default_outdir='csubst_doctor', default_prefix='csubst')
    psr_out_inspect = _make_output_parent_parser(default_outdir='csubst_inspect', default_prefix='csubst')
    psr_out_simulate = _make_output_parent_parser(default_outdir='csubst_simulate', default_prefix='csubst')

    # shared: IQ-TREE inputs
    psr_iq = argparse.ArgumentParser(add_help=False)
    psr_iq.add_argument('--iqtree_exe', metavar='PATH', default='iqtree', type=str, required=False,
                        help='default=%(default)s: PATH to the IQ-TREE executable')
    psr_iq.add_argument('--iqtree_outdir', metavar='PATH', default='csubst_iqtree', type=str, required=False,
                        help='default=%(default)s: Shared directory used when --iqtree_* paths are "infer".')
    psr_iq.add_argument('--iqtree_model', metavar='STR', default='ECMK07+F+R4', type=str, required=False,
                        help='default=%(default)s: Codon substitution model for ancestral state reconstruction. '
                             'Base models of "MG", "GY", "ECMK07", and "ECMrest" are supported. '
                             'Among-site rate heterogeneity and codon frequencies can be specified. '
                             'See here for details: http://www.iqtree.org/doc/Substitution-Models')
    psr_iq.add_argument('--iqtree_redo', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Whether to rerun IQ-TREE even if all intermediate files exist.')
    psr_iq.add_argument('--iqtree_treefile', metavar='PATH', default='infer', type=str, required=False,
                       help='default=%(default)s: PATH to the IQ-TREE\'s .treefile output. '
                            '"infer" resolves inside --iqtree_outdir from --alignment_file')
    psr_iq.add_argument('--iqtree_state', metavar='PATH', default='infer', type=str, required=False,
                       help='default=%(default)s: PATH to the IQ-TREE\'s .state output. '
                            '"infer" resolves inside --iqtree_outdir from --alignment_file')
    psr_iq.add_argument('--iqtree_rate', metavar='PATH', default='infer', type=str, required=False,
                       help='default=%(default)s: PATH to the IQ-TREE\'s .rate output. '
                            '"infer" resolves inside --iqtree_outdir from --alignment_file')
    psr_iq.add_argument('--iqtree_iqtree', metavar='PATH', default='infer', type=str, required=False,
                       help='default=%(default)s: PATH to the IQ-TREE\'s .iqtree output. '
                            '"infer" resolves inside --iqtree_outdir from --alignment_file')
    psr_iq.add_argument('--iqtree_log', metavar='PATH', default='infer', type=str, required=False,
                        help='default=%(default)s: PATH to the IQ-TREE\'s .log output. '
                             '"infer" resolves inside --iqtree_outdir from --alignment_file')

    # shared: Ancestral_state
    psr_as = argparse.ArgumentParser(add_help=False)
    psr_as.add_argument('--ml_anc', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Maximum-likelihood-like analysis by binarizing ancestral states.')
    psr_as.add_argument('--min_sub_pp', metavar='FLOAT', default=0, type=float,
                         help='default=%(default)s: The minimum posterior probability of single substitutions to count. '
                              'Set 0 for a counting without binarization. Omitted if --ml_anc is set to "yes". '
                              'For empirical omega_C p-values (--calc_omega_pvalue yes), values around 0.05 are recommended.')
    psr_as.add_argument('--drop_invariant_tip_sites', metavar='no|tip_invariant|zero_sub_mass', default='tip_invariant', type=str,
                        help='default=%(default)s: Site-drop criterion before substitution-tensor generation. '
                             '"no" disables site dropping. '
                             '"tip_invariant" drops codon sites invariant across non-missing tips '
                             '(including sites with only one unambiguous tip codon). '
                             '"zero_sub_mass" drops only sites guaranteed to have zero observed substitution mass '
                             '(both N and S) across analyzed branches. '
                             'In `inspect`, csubst_sites_index_map.tsv is written to map retained internal '
                             'site indices to original alignment positions.')
    psr_rc = argparse.ArgumentParser(add_help=False)
    psr_rc.add_argument('--nonsyn_recode', metavar='no|3di20|dayhoff6|sr6|kgb6|sr4|dayhoff9|dayhoff12|dayhoff15|dayhoff18|srchisq6|kgbauto6',
                        default='no', type=str,
                        help='default=%(default)s: Recoding scheme for nonsynonymous-state calculations. '
                             'Use "no" for standard 20 amino-acid states. '
                             '"3di20" enables structural-alphabet (3Di) recoding via ProstT5. '
                             '"srchisq6" and "kgbauto6" infer 6-state groupings from alignment composition.')
    psr_rc.add_argument('--sa_asr_mode', metavar='translate|direct', default='direct', type=str,
                        help='default=%(default)s: Structural-alphabet ancestral-state mode for --nonsyn_recode 3di20. '
                             '"translate" converts ML amino-acid states to 3Di via ProstT5, then one-hot encodes '
                             'each branch-site by argmax state. '
                             '"direct" runs direct 3Di ancestral-state reconstruction with IQ-TREE.')
    psr_rc.add_argument('--prostt5_model', metavar='STR', default='Rostlab/ProstT5', type=str,
                        help='default=%(default)s: ProstT5 model identifier used for --nonsyn_recode 3di20.')
    psr_rc.add_argument('--prostt5_local_dir', metavar='PATH', default='', type=str,
                        help='default=%(default)s: Optional local directory containing ProstT5 files. '
                             'If provided, it is used instead of --prostt5_model.')
    psr_rc.add_argument('--prostt5_no_download', metavar='yes|no', default='no', type=strtobool,
                        help='default=%(default)s: Set "yes" to disable automatic ProstT5 download and require local files.')
    psr_rc.add_argument('--prostt5_device', metavar='auto|cpu|cuda|mps', default='auto', type=str,
                        help='default=%(default)s: Device for ProstT5 inference. '
                             'With "auto", CUDA is preferred, then MPS, then CPU. '
                             'When MPS is used, CSUBST enables PYTORCH_ENABLE_MPS_FALLBACK=1 automatically.')
    psr_rc.add_argument('--prostt5_cache', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Reuse ProstT5 sequence-level cache.')
    psr_rc.add_argument('--prostt5_cache_file', metavar='PATH', default=_default_prostt5_cache_file(), type=str,
                        help='default=%(default)s: ProstT5 cache file path.')
    psr_rc.add_argument('--sa_state_cache', metavar='auto|yes|no', default='auto', type=str,
                        help='default=%(default)s: Shared 3Di-state cache mode for --nonsyn_recode 3di20. '
                             '"auto" uses --sa_state_cache_file when a compatible cache exists, otherwise computes and writes it. '
                             '"yes" requires a compatible cache file. "no" disables cache read/write.')
    psr_rc.add_argument('--sa_state_cache_file', metavar='PATH', default=_default_sa_state_cache_file(), type=str,
                        help='default=%(default)s: Shared 3Di-state cache file path used by search and inspect.')
    psr_rc.add_argument('--sa_iqtree_model', metavar='STR', default='GTR', type=str,
                        help='default=%(default)s: IQ-TREE model for --sa_asr_mode direct with --nonsyn_recode 3di20 '
                             '(used with --seqtype MORPH).')
    psr_treeplot = argparse.ArgumentParser(add_help=False)
    psr_treeplot.add_argument('--species_regex', metavar='REGEX', default='^([^_]+_[^_]+)_', required=False, type=str,
                              help='default=%(default)s: Regular expression for extracting species IDs from leaf labels '
                                   'for species-overlap node annotation in tree plots. '
                                   'If the regex contains capture groups, the first non-empty captured group is used; '
                                   'otherwise, the full regex match is used. '
                                   'For GENUS_SPECIES_GENEID labels, the default works as-is.')
    psr_treeplot.add_argument('--species_overlap_node_plot', metavar='yes|no|auto', default='auto', required=False, type=str,
                              help='default=%(default)s: Show speciation/duplication node markers in tree plots. '
                                   '"yes" always tries to plot, "no" disables them, and "auto" enables them only when '
                                   'all tip labels are parseable by --species_regex '
                                   '(GENUS_SPECIES_GENEID labels satisfy the default regex).')
    psr_output_manifest = argparse.ArgumentParser(add_help=False)
    psr_output_manifest.add_argument('--output_manifest', metavar='yes|no', default='yes', required=False, type=strtobool,
                                     help='default=%(default)s: Write an outputs manifest TSV with generated files, '
                                          'sizes, and key command parameters.')

    # shared: foreground
    psr_fg = argparse.ArgumentParser(add_help=False)
    psr_fg.add_argument('--foreground', metavar='PATH', default=None, type=str, required=False,
                    help='default=%(default)s: A text file to specify the foreground lineages. '
                         'The file should contain two columns separated by a tab: '
                         '1st column for lineage IDs and 2nd for regex-compatible leaf names. '
                         'See https://github.com/kfuku52/csubst/wiki/Foreground-specification')
    psr_fg.add_argument('--fg_format', metavar='1|2', default=1, type=int, choices=[1,2],
                    help='default=%(default)s: Table format of --foreground.')
    psr_fg.add_argument('--fg_exclude_wg', metavar='yes|no', default='yes', type=strtobool,
                    help='default=%(default)s: Set "yes" to exclude branch combinations '
                         'within individual foreground lineages.')
    psr_fg.add_argument('--fg_stem_only', metavar='yes|no', default='yes', type=strtobool,
                    help='default=%(default)s: Set "yes" to exclude non-stem branches of foreground lineages.')
    psr_fg.add_argument('--mg_parent', metavar='yes|no', default='no', type=strtobool,
                    help='default=%(default)s: Mark the parent branches of the foreground stem branches as "marginal". '
                         'They may serve as "negative controls" relative to the foreground lineages.')
    psr_fg.add_argument('--mg_sister', metavar='yes|no', default='no', type=strtobool,
                    help='default=%(default)s: Mark the sister branches of the foreground stem branches as "marginal". '
                         'They may serve as "negative controls" relative to the foreground lineages.')
    psr_fg.add_argument('--mg_sister_stem_only', metavar='yes|no', default='yes', type=strtobool,
                    help='default=%(default)s: Set "yes" to exclude non-stem branches of sister lineages.')
    psr_fg.add_argument('--fg_clade_permutation', metavar='INT', default=0, type=int,
                    help='default=%(default)s: Experimental. Randomly select the same/similar number and size of clades as foreground '
                         'and run analysis N times to obtain a permutation-based P value of convergence. '
                         'At least 1000 is recommended.')
    psr_fg.add_argument('--min_clade_bin_count', metavar='INT', default=10, type=int,
                    help='default=%(default)s: Experimental. Minimum number of branches per bin for foreground clade permutation. ')

    # dataset
    help_txt = 'generates out-of-the-box test datasets. See `csubst dataset -h`'
    dataset = subparsers.add_parser('dataset', help=help_txt, parents=[])
    dataset.add_argument('--name', metavar='STR', default='PGK', type=str, choices=['PGK','PEPC'],
                         help='default=%(default)s: Name of dataset to generate.')
    dataset.add_argument('--iqtree_outdir', metavar='PATH', default='csubst_iqtree', type=str,
                         help='default=%(default)s: Directory for bundled IQ-TREE intermediate files.')
    dataset.set_defaults(handler=command_dataset)

    # simulate
    help_txt = 'generates a simulated sequence alignment under a convergent evolutionary scenario. See `csubst simulate -h`'
    simulate = subparsers.add_parser('simulate', help=help_txt, parents=[psr_co,psr_iq,psr_fg,psr_out_simulate])
    simulate.add_argument('--background_omega', metavar='FLOAT', default=None, type=float,
                          help='default=IQ-TREE estimate (fallback 0.2): dN/dS for background branches. '
                               'Set explicitly to override IQ-TREE estimated omega.')
    simulate.add_argument('--foreground_omega', metavar='FLOAT', default=0.2, type=float,
                          help='default=%(default)s: dN/dS for foreground branches.')
    simulate.add_argument('--num_simulated_site', metavar='INT', default=-1, type=int,
                          help='default=%(default)s: Number of codon sites to simulate. '
                               '-1 to set the size of input alignment.')
    simulate.add_argument('--percent_convergent_site', metavar='FLOAT', default=100, type=float,
                          help='default=%(default)s: Percentage of codon sites to evolve convergently.'
                               'If --convergent_amino_acids randomN, '
                               'Convergent amino acids are randomly selected within each partition. ')
    simulate.add_argument('--optimized_branch_length', metavar='yes|no', default='no', type=strtobool,
                          help='default=%(default)s: Whether to use the branch lengths optimized by IQ-TREE. '
                               'If "no", the branch lengths in the input tree are used.')
    simulate.add_argument('--tree_scaling_factor', metavar='FLOAT', default=1, type=float,
                          help='default=%(default)s: Branch lengths are multiplied by this value.')
    simulate.add_argument('--foreground_scaling_factor', metavar='FLOAT', default=1, type=float,
                          help='default=%(default)s: In the codon sites specified by --percent_convergent_site, '
                               'branch lengths in foreground lineages are multiplied by this value.')
    simulate.add_argument('--convergent_amino_acids', metavar='STR', default='random1', type=str,
                          help='default=%(default)s: Non-delimited list of amino acids the sequences converge into. '
                               'e.g, AQTS, ACQ, WDETS... '
                               '"randomN" specifies randomly selected N amino acids. '
                               '"random0" does not cause convergence.')
    simulate.add_argument('--percent_biased_sub', metavar='FLOAT', default=90, type=float,
                          help='default=%(default)s: Approximately this percentage of nonsynonymous substitutions '
                               'in the foreground branches/sites are biased toward amino acids specified by '
                               '--convergent_amino_acids, while preserving the original relative codon frequencies '
                               'among the synonymous codons.')
    simulate.add_argument('--simulate_seed', metavar='INT', default=-1, type=int,
                          help='default=%(default)s: Random seed for simulation. '
                               'Set -1 to use non-deterministic seed.')
    simulate.add_argument('--simulate_asrv', metavar='no|file', default='no', type=str,
                          choices=['no', 'file'],
                          help='default=%(default)s: Among-site rate variation mode for simulation. '
                               '"no" uses a single-rate process across sites. '
                               '"file" applies site rates from IQ-TREE .rate '
                               '(C_Rate if present, otherwise Rate) to pyvolve per site.')
    simulate.add_argument('--simulate_eq_freq', metavar='auto|iqtree|alignment', default='auto', type=str,
                          choices=['auto', 'iqtree', 'alignment'],
                          help='default=%(default)s: Source of codon equilibrium frequencies for simulation Q. '
                               '"auto" prefers IQ-TREE frequencies and falls back to alignment frequencies; '
                               '"iqtree" requires parsed IQ-TREE frequencies; '
                               '"alignment" always re-estimates from alignment.')
    simulate.add_argument('--export_true_asr', metavar='yes|no', default='yes', type=strtobool,
                          help='default=%(default)s: Export true ancestral-state bundle from simulation '
                               '(.state/.treefile/.rate/.iqtree/.log/.anc.fa) for search-time evaluation.')
    simulate.add_argument('--true_asr_prefix', metavar='PATH_PREFIX', default='', type=str,
                          help='default=infer from --outdir/--output_prefix: Output path prefix for true-ASR bundle files.')
    simulate.set_defaults(handler=command_simulate)

    # benchmark
    help_txt = 'benchmarks `csubst search` across parameter grids on the same input data and summarizes runtime/output metrics. See `csubst benchmark -h`'
    benchmark = subparsers.add_parser('benchmark', help=help_txt, parents=[psr_co,psr_iq,psr_fg,psr_as,psr_rc,psr_output_manifest,psr_out_benchmark])
    _add_search_subcommand_args(benchmark)
    benchmark.add_argument('--benchmark_expectation_methods', metavar='LIST', default='', type=str,
                           help='default=current --expectation_method: Comma-delimited expectation methods to compare.')
    benchmark.add_argument('--benchmark_asrv_modes', metavar='LIST', default='', type=str,
                           help='default=current --asrv: Comma-delimited ASRV modes to compare.')
    benchmark.add_argument('--benchmark_nonsyn_recode_modes', metavar='LIST', default='', type=str,
                           help='default=current --nonsyn_recode: Comma-delimited nonsynonymous recoding modes to compare.')
    benchmark.add_argument('--benchmark_sa_asr_modes', metavar='LIST', default='', type=str,
                           help='default=current --sa_asr_mode: Comma-delimited 3Di ASR modes to compare when --nonsyn_recode is 3di20.')
    benchmark.add_argument('--benchmark_pseudocount_modes', metavar='LIST', default='', type=str,
                           help='default=current --pseudocount_mode: Comma-delimited pseudocount modes to compare.')
    benchmark.add_argument('--benchmark_keep_going', metavar='yes|no', default='yes', type=strtobool,
                           help='default=%(default)s: Continue remaining benchmark runs after a failed configuration.')
    benchmark.add_argument('--benchmark_score_column', metavar='COLUMN', default='omegaCany2spe', type=str,
                           help='default=%(default)s: Score column to summarize from csubst_cb_2.tsv.')
    benchmark.add_argument('--benchmark_ocn_column', metavar='COLUMN', default='OCNany2spe', type=str,
                           help='default=%(default)s: Count column paired with --benchmark_score_column.')
    benchmark.add_argument('--benchmark_min_score', metavar='FLOAT', default=5.0, type=float,
                           help='default=%(default)s: Threshold for counting high-score hits in benchmark summaries.')
    benchmark.add_argument('--benchmark_min_ocn', metavar='FLOAT', default=2.0, type=float,
                           help='default=%(default)s: Threshold for counting high-score hits in benchmark summaries.')
    benchmark.set_defaults(handler=command_benchmark)

    # benchmark-plot
    help_txt = 'collects existing benchmark outputs, compares parameter-wise performance, and writes an overview plot. See `csubst benchmark-plot -h`'
    benchmark_plot = subparsers.add_parser('benchmark-plot', help=help_txt, parents=[psr_output_manifest,psr_out_benchmark_plot])
    benchmark_plot.add_argument('--benchmark_dir', metavar='PATH', default='.', type=str,
                                help='default=%(default)s: Root directory to scan for benchmark outputs.')
    benchmark_plot.add_argument('--benchmark_plot_recursive', metavar='yes|no', default='yes', type=strtobool,
                                help='default=%(default)s: Recursively scan --benchmark_dir for benchmark outputs.')
    benchmark_plot.add_argument('--benchmark_plot_metrics', metavar='LIST', default='elapsed_sec,hit_rows,score_max', type=str,
                                help='default=%(default)s: Comma-delimited numeric columns to compare in the overview plot.')
    benchmark_plot.add_argument('--benchmark_plot_format', metavar='pdf|png|svg', default='pdf', type=str,
                                choices=['pdf', 'png', 'svg'],
                                help='default=%(default)s: Output format for the overview figure.')
    benchmark_plot.set_defaults(handler=command_benchmark_plot)

    # doctor
    help_txt = 'checks input files, inferred IQ-TREE paths, and optional foreground/3Di settings before running heavier workflows. See `csubst doctor -h`'
    doctor = subparsers.add_parser('doctor', help=help_txt, parents=[psr_co,psr_iq,psr_fg,psr_rc,psr_output_manifest,psr_out_doctor])
    doctor.add_argument('--check_iqtree_exe', metavar='yes|no', default='yes', type=strtobool,
                        help='default=%(default)s: Check that --iqtree_exe is callable with --version.')
    doctor.add_argument('--doctor_fail_level', metavar='error|warning|never', default='error', type=str,
                        choices=['error', 'warning', 'never'],
                        help='default=%(default)s: Exit non-zero when checks at or above this level are found.')
    doctor.set_defaults(handler=command_doctor)


    # sites
    help_txt = 'calculates site-wise combinatorial substitutions on focal branch combinations and maps them onto protein structure. '
    help_txt += 'See `csubst sites -h` (legacy alias: `csubst site -h`).'
    site = subparsers.add_parser('sites', aliases=['site'], help=help_txt, parents=[psr_co,psr_iq,psr_as,psr_rc,psr_treeplot,psr_output_manifest])
    site.add_argument('--branch_id', metavar='fg|INT,INT,INT,...', default='', required=False, type=str,
                      help='default=%(default)s: Comma-delimited list of branch_ids to characterize. '
                      'Run `csubst search` first and select branches of interest. '
                      'If "fg", all foreground branch pairs will be analyzed (--mode intersection only). '
                      '--mode lineage expects ANC,DES, and --mode set ignores --branch_id.')
    site.add_argument('--mode', metavar='MODE', default='intersection', required=False, type=str,
                      help='default=%(default)s: Visualization mode. '
                      '"intersection", visualize combinatorial substitutions in branches specified by --branch_id. '
                      '"lineage", process branches between ancestor/descendant specified by --branch_id ANC,DES. '
                      '"set,SUBTYPE,EXPR", evaluate set operations among branch-wise substitutions (|, -, &, ^). '
                      'SUBTYPE: any or spe. '
                      'Use "A" for all other non-root branches. '
                      'e.g. "set,any,((117|48)-A)".')
    site.add_argument('--tree_site_plot', metavar='yes|no', default='yes', type=strtobool,
                      help='default=%(default)s: Generate a matplotlib-based tree + site summary plot.')
    site.add_argument('--site_state_plot', metavar='yes|no', default='yes', type=strtobool,
                      help='default=%(default)s: Generate substitution-pattern summary outputs '
                           '(csubst_sites.state.pdf, csubst_sites.state_N.tsv, csubst_sites.state_S.tsv).')
    site.add_argument('--tree_site_plot_format', metavar='pdf|png|svg', default='pdf', required=False, type=str,
                      choices=['pdf', 'png', 'svg'],
                      help='default=%(default)s: Output format for --tree_site_plot.')
    site.add_argument('--min_single_prob', metavar='FLOAT', default=0.8, required=False, type=float,
                      help='default=%(default)s: Minimum probability threshold for single-branch substitutions. '
                           'Shared by site plotting and PyMOL outputs.')
    site.add_argument('--min_combinat_prob', metavar='FLOAT', default=0.5, required=False, type=float,
                      help='default=%(default)s: Minimum probability threshold for combinatorial substitutions. '
                           'Shared by site plotting and PyMOL outputs.')
    site.add_argument('--tree_site_plot_max_sites', metavar='INT', default=30, required=False, type=int,
                      help='default=%(default)s: Maximum number of plotted sites in the tree + site summary '
                           '(convergent and divergent columns are balanced when both are available).')
    site.add_argument('--site_output_manifest', metavar='yes|no', dest='output_manifest', default='yes', required=False, type=strtobool,
                      help=argparse.SUPPRESS)
    site.add_argument('--cb_file', metavar='PATH', default='csubst_cb_2.tsv', required=False, type=str,
                      help='default=%(default)s: PATH to csubst_cb_*.tsv output of csubst search.')
    site.add_argument('--untrimmed_cds', metavar='PATH', default=None, required=False, type=str,
                      help='default=%(default)s: PATH to fasta file containing untrimmed CDS sequence(s). '
                           'Codon positions along the sequence(s) appear in the output tsv.')
    site.add_argument('--pdb', metavar='PATH/PDB_CODE/besthit', default=None, required=False, type=str,
                      help='default=%(default)s: '
                           'One of the followings. '
                           'PATH: PATH to the downloaded pdb file. '
                           'PDB CODE: e.g. 3SV0. PDB file will be fetched from the database. '
                           'The PDB protein sequence will be aligned with mafft, '
                           'and a pymol session will be generated. '
                           'besthit: Run online sequence search and fetch the best-hit model. See --database.')
    site.add_argument('--database', metavar='DATABASE1,DATABASE2,...', default='swissmodel,pdb,alphafold,alphafill', required=False, type=str,
                      help='default=%(default)s: '
                           'Comma-delimited names of protein structure databases to search. '
                           'The top priority is given to the first. '
                           'swissmodel: Run online QBLAST search against the UniProt database (https://www.uniprot.org/) and fetch the best SWISS-MODEL homology model (https://swissmodel.expasy.org/). '
                           'pdb: Run online MMseqs2 search against the RCSB PDB database (https://www.rcsb.org/) and fetch the best-hit PDB model. '
                           'alphafold: Run online QBLAST search against the UniProt database (https://www.uniprot.org/) and fetch the best-hit AlphaFold model (https://alphafold.ebi.ac.uk/). '
                           'alphafill: Run online QBLAST search against the UniProt database (https://www.uniprot.org/) and fetch the best-hit AlphaFill model (https://alphafill.eu/).')
    site.add_argument('--database_evalue_cutoff', metavar='FLOAT', default=1.0, required=False, type=float,
                      help='default=%(default)s: E-value cutoff in the database searches. Applied to MMseqs2 and QBLAST.')
    site.add_argument('--database_minimum_identity', metavar='FLOAT', default=0.25, required=False, type=float,
                      help='default=%(default)s: The minimum sequence identity for the database searches. Applied to MMseqs2. See https://search.rcsb.org/index.html#search-api')
    site.add_argument('--database_timeout', metavar='FLOAT', default=30.0, required=False, type=float,
                      help='default=%(default)s: Network timeout in seconds for online structure database requests.')
    site.add_argument('--user_alignment', metavar='PATH', default=None, required=False, type=str,
                      help='default=%(default)s: The user-provided alignment FASTA for the substitution mapping to protein structures.')
    site.add_argument('--remove_solvent', metavar='yes|no', default='yes', type=strtobool,
                      help='default=%(default)s: Whether to remove solvent and small non-specific ligands. '
                           'Used only with --pdb.')
    site.add_argument('--remove_ligand', metavar='CODE1,CODE2,CODE3,...', default='', type=str,
                      help='default=%(default)s: Comma-delimited list of PDB ligand codes to be removed. '
                           'e.g., "so4,po4,bme". '
                           'Used only with --pdb.')
    site.add_argument('--mask_subunit', metavar='yes|no', default='yes', type=strtobool,
                      help='default=%(default)s: Whether to mask unrelated subunits. '
                           'Used only with --pdb.')
    site.add_argument('--mafft_exe', metavar='PATH', default='mafft', required=False, type=str,
                      help='default=%(default)s: PATH to mafft executable.')
    site.add_argument('--mafft_op', metavar='FLOAT', default=-1, required=False, type=float,
                      help='default=%(default)s: mafft --op parameter. -1 to use the default value.')
    site.add_argument('--mafft_ep', metavar='FLOAT', default=-1, required=False, type=float,
                      help='default=%(default)s: mafft --ep parameter. -1 to use the default value.')
    site.add_argument('--pymol_gray', metavar='INT', default=80, required=False, type=int,
                      help='default=%(default)s: Gray value for no-substitution sites. 0=black, 100=white')
    site.add_argument('--pymol_transparency', metavar='FLOAT', default=0.65, required=False, type=float,
                      help='default=%(default)s: Surface transparency. 0=non-transparent, 1=completely transparent')
    site.add_argument('--pymol_surface_quality', metavar='INT', default=-1, required=False, type=int,
                      help='default=%(default)s: PyMOL surface quality setting. Lower values speed up surface generation at the cost of smoothness.')
    site.add_argument('--pymol_img', metavar='yes|no', default='yes', type=strtobool,
                      help='default=%(default)s: Whether to generate a rendered 6-view image file of protein structure.')
    site.add_argument('--pymol_max_num_chain', metavar='INT', default=20, required=False, type=int,
                         help='default=%(default)s: Maximum number of chains to visualize. '
                               'If the number of chains in the PDB file is larger than this value, '
                               '--pymol_img will be disabled.')
    site.add_argument('--uniprot_feature_types', metavar='all|TYPE1,TYPE2,...', default='all', required=False, type=str,
                      help='default=%(default)s: UniProt feature types to append to csubst_sites output. '
                           'By default ("all"), all available UniProt features are considered. '
                           'Redundant annotations (including the uniprot_feature_types_* column) are excluded unless '
                           '--uniprot_include_redundant is "yes". '
                           'Use a comma-delimited subset to filter (e.g., "Active site,Binding site").')
    site.add_argument('--uniprot_include_redundant', metavar='yes|no', default='no', required=False, type=strtobool,
                      help='default=%(default)s: Include site-invariant/redundant UniProt annotations '
                           '(e.g., accession columns and full-length features such as Chain).')
    site.add_argument('--export2chimera', metavar='yes|no', default='no', required=False, type=strtobool,
                      help='default=%(default)s: Set "yes" to export files for the visualization of '
                           'convergence/divergence probabilities with UCSF Chimera. '
                           '--untrimmed_cds is required.')
    site.set_defaults(handler=command_sites)


    # search
    help_txt = 'calculates convergence rates and other metrics on branch combinations. '
    help_txt += 'See `csubst search -h` (legacy alias: `csubst analyze -h`).'
    analyze = subparsers.add_parser('search', aliases=['analyze'], help=help_txt, parents=[psr_co,psr_iq,psr_fg,psr_as,psr_rc,psr_out_analyze])
    analyze.set_defaults(handler=command_search)
    _add_search_subcommand_args(analyze)

    # inspect
    help_txt = 'writes branch/tree/state inspection outputs without running convergence statistics. See `csubst inspect -h`'
    inspect = subparsers.add_parser('inspect', help=help_txt, parents=[psr_co,psr_iq,psr_fg,psr_as,psr_rc,psr_treeplot,psr_output_manifest,psr_out_inspect])
    inspect.add_argument('--plot_state_aa', metavar='no|all|SITE_SPEC', default='no', type=str,
                         help='default=%(default)s: Write ancestral amino-acid state trees from unfiltered sites. '
                              'Use "all" for one multi-page PDF with every site, '
                              '"2,5,10" for one multi-page PDF with one site per page, '
                              'or "2-5-10" for one single-page PDF with concatenated site labels.')
    inspect.add_argument('--plot_state_codon', metavar='no|all|SITE_SPEC', default='no', type=str,
                         help='default=%(default)s: Write ancestral codon state trees from unfiltered sites. '
                              'Use "all" for one multi-page PDF with every site, '
                              '"2,5,10" for one multi-page PDF with one site per page, '
                              'or "2-5-10" for one single-page PDF with concatenated site labels.')
    inspect.add_argument('--plot_state_aa_highlight_pattern', metavar='AASEQ', default='', type=str,
                         help='default=%(default)s: When --plot_state_aa requests specific site(s), '
                              'highlight tip labels whose concatenated amino-acid states exactly match this pattern. '
                              'If all descendant leaves of a clade match, branch/clade coloring is also highlighted.')
    inspect.add_argument('--plot_state_aa_highlight_color', metavar='COLOR', default='crimson', type=str,
                         help='default=%(default)s: Matplotlib color for --plot_state_aa_highlight_pattern matches.')
    inspect.add_argument('--tree_tip_label_spacing', metavar='FLOAT', default=1.0, type=float,
                         help='default=%(default)s: Vertical spacing multiplier for inspect tree tip labels. '
                              '1.0 keeps the current layout; 2.0 doubles the spacing by increasing figure height per leaf.')
    inspect.add_argument('--tree_fig_max_height', metavar='FLOAT', default=180.0, type=float,
                         help='default=%(default)s: Maximum PDF page height in inches for inspect tree plots. '
                              'Increase this together with --tree_tip_label_spacing for very large trees.')
    inspect.add_argument('--plot_nonsyn_recode_pca', metavar='yes|no', default='no', type=strtobool,
                         help='default=%(default)s: Write csubst_nonsyn_recoding_pca.png from input alignment.')
    inspect.add_argument('--plot_nonsyn_recode_pca_3di20', metavar='yes|no', default='no', type=strtobool,
                         help='default=%(default)s: Include 3di20 in csubst_nonsyn_recoding_pca.png.')
    inspect.add_argument('--sa_smoke_max_branches', metavar='INTEGER', default=0, type=int,
                         help='default=%(default)s: CPU smoke mode for --nonsyn_recode 3di20. '
                              'Limit 3Di inference to the first N non-root branches ordered by branch_id. '
                              'Set 0 to disable.')
    inspect.add_argument('--download_prostt5', metavar='yes|no', default='no', type=strtobool,
                         help='default=%(default)s: Download/check ProstT5 model files and exit.')
    inspect.set_defaults(handler=command_inspect)
    return parser


def _main():
    # Start time
    csubst_start = time.time()
    print('CSUBST start:', datetime.datetime.now(datetime.timezone.utc), flush=True)
    parser = _build_parser()

    # Handler
    args = parser.parse_args()
    #param.set_num_thread_variables(num_thread=args.threads)
    if hasattr(args, 'handler'):
        try:
            with runtime.run_tempdir_context(base_dir=os.getcwd()):
                args.handler(args)
        except (ValueError, AssertionError) as e:
            sys.stderr.write(str(e).strip() + '\n')
            sys.exit(2)
    else:
        parser.print_help()

    # End time
    txt = '\nCSUBST end: {}, Elapsed time = {:,.1f} sec'
    print(txt.format(datetime.datetime.now(datetime.timezone.utc), int(time.time()-csubst_start)), flush=True)


if __name__ == "__main__":
    log_path = _resolve_log_file_from_argv(sys.argv[1:])
    with open(log_path, 'w', buffering=1, encoding='utf-8') as log_file:
        stdout_tee = _TeeTextStream(sys.stdout, log_file)
        stderr_tee = _TeeTextStream(sys.stderr, log_file)
        with contextlib.redirect_stdout(stdout_tee), contextlib.redirect_stderr(stderr_tee):
            _main()
