Skip to content

Commit

Permalink
split out the main.py stuff into respective modules
Browse files Browse the repository at this point in the history
  • Loading branch information
pdimens committed May 7, 2024
1 parent 8082e03 commit 2da18f1
Show file tree
Hide file tree
Showing 11 changed files with 423 additions and 405 deletions.
349 changes: 35 additions & 314 deletions src/harpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,20 @@
print(details, file = sys.stderr)
exit(1)

from . import align
from . import demultiplex
from . import impute
from . import phase
from . import preflight
from . import qc
from . import simulate
from . import snp
from . import sv
from .popgroups import popgroup
#from .simulatelinkedreads import reads
from .simulatevariants import snpindel, inversion, cnv, translocation
from .simulatelinkedreads import linkedreads
from .stitchparams import stitchparams
from .hpc import hpc
from .demultiplex import gen1
from .preflight import bam, fastq
from .qc import qc
from .align import bwa, ema, minimap
from .snp import freebayes, mpileup
from .sv import leviathan, naibr
from .impute import impute
from .phase import phase
from .helperfunctions import generate_conda_deps
import rich_click as click
import subprocess
from .helperfunctions import generate_conda_deps

click.rich_click.USE_MARKDOWN = True
click.rich_click.SHOW_ARGUMENTS = False
Expand All @@ -59,126 +56,40 @@ def cli():
"""
pass

@click.group(options_metavar='', context_settings=dict(help_option_names=["-h", "--help"]))
def demultiplex():
"""
Demultiplex haplotagged FASTQ files
Check that you are using the correct haplotag method/technology, since the different
barcoding approaches have very different demultiplexing strategies.
**Haplotag Technologies**
- `gen1`: the original haplotagging barcode strategy developed by Meier _et al._ (2021)
"""

@click.group(options_metavar='', context_settings=dict(help_option_names=["-h", "--help"]))
def align():
"""
Align sample sequences to a reference genome
The three available aligners all retain the linked-read barcode information in the
resulting output, however `EMA` is the only aligner to use the barcode information
to facilitate the aligning process and can be prohibitively slow. The `minimap2`
aligner is the fastest of the three and is comparable in accuracy to `bwa` for
sequences >100bp.
**Aligners**
- `bwa`: uses BWA MEM to align reads (fast)
- `ema`: uses the BX barcode-aware EMA aligner (very slow)
- `minimap`: uses minimap2 to align reads (ultra fast)
Provide an additional subcommand `bwa`, `ema`, or `minimap` to get more information on using
those aligners.
"""
pass

@click.group(options_metavar='', context_settings=dict(help_option_names=["-h", "--help"]))
def snp():
"""
Call SNPs and small indels
**Variant Callers**
- `mpileup`: call variants using bcftools mpileup
- `freebayes`: call variants using freebayes
Provide an additional subcommand `mpileup` or `freebayes` to get more information on using
those variant callers. They are both robust variant callers and neither is recommended over the other.
"""
pass

@click.group(options_metavar='', context_settings=dict(help_option_names=["-h", "--help"]))
def sv():
"""
Call large structural variants
**Structural Variant Callers**
- `naibr`: calls inversions, duplicates, deletions
- `leviathan`: calls inversions, duplicates, deletions, misc breakends
Provide an additional subcommand `leviathan` or `naibr` to get more information on using
those variant callers. NAIBR tends to call variants better, but requires more user preprocessing.
"""
pass

@click.group(options_metavar='', context_settings=dict(help_option_names=["-h", "--help"]))
def preflight():
"""
Run file format checks on haplotag data
This is useful to make sure your input files are formatted correctly for the processing pipeline
before you are surprised by errors hours into an analysis. Provide an additional command `fastq`
or `bam` to see more information and options.
"""
pass

@click.group(options_metavar='', context_settings=dict(help_option_names=["-h", "--help"]))
def simulate():
"""
Simulate variants or linked reads from a genome
To simulate genomic variants, provide an additional subcommand {`snpindel`,`inversion`,`cnv`,`translocation`}
to get more information about that workflow. The limitations of the simulator
(`simuG`) are such that you may simulate only one type of variant at a time,
so you may need to run this module again on the resulting genome. Use `simulate linkedreads`
to simulate haplotag linked-reads from a diploid genome, which you can create by simulating
genomic variants.
"""
pass

# main program
#cli.add_command(hpc)
cli.add_command(popgroup)
cli.add_command(stitchparams)
cli.add_command(preflight)
cli.add_command(demultiplex)
cli.add_command(qc)
cli.add_command(align)
cli.add_command(snp)
cli.add_command(sv)
cli.add_command(impute)
cli.add_command(phase)
cli.add_command(simulate)
cli.add_command(preflight.preflight)
cli.add_command(demultiplex.demultiplex)
cli.add_command(qc.qc)
cli.add_command(align.align)
cli.add_command(snp.snp)
cli.add_command(sv.sv)
cli.add_command(impute.impute)
cli.add_command(phase.phase)
cli.add_command(simulate.simulate)
# demultiplex submodules
demultiplex.add_command(gen1)
demultiplex.demultiplex.add_command(demultiplex.gen1)
# preflight submodules
preflight.add_command(fastq)
preflight.add_command(bam)
preflight.preflight.add_command(preflight.fastq)
preflight.preflight.add_command(preflight.bam)
# align submodules
align.add_command(bwa)
align.add_command(ema)
align.add_command(minimap)
align.align.add_command(align.bwa)
align.align.add_command(align.ema)
align.align.add_command(align.minimap)
# snp submodules
snp.add_command(mpileup)
snp.add_command(freebayes)
snp.snp.add_command(snp.mpileup)
snp.snp.add_command(snp.freebayes)
# sv submodules
sv.add_command(leviathan)
sv.add_command(naibr)
sv.sv.add_command(sv.leviathan)
sv.sv.add_command(sv.naibr)
# simulate submodules
simulate.add_command(snpindel)
simulate.add_command(inversion)
simulate.add_command(cnv)
simulate.add_command(translocation)
simulate.add_command(linkedreads)
simulate.simulate.add_command(simulate.snpindel)
simulate.simulate.add_command(simulate.inversion)
simulate.simulate.add_command(simulate.cnv)
simulate.simulate.add_command(simulate.translocation)
simulate.simulate.add_command(simulate.linkedreads)
#simulate.add_command(reads)

## the modules ##
Expand Down Expand Up @@ -207,197 +118,7 @@ def simulate():
]
}

click.rich_click.OPTION_GROUPS = {
"harpy preflight bam": [
{
"name": "Options",
"options": ["--output-dir", "--threads", "--snakemake", "--quiet", "--help"],
},
],
"harpy preflight fastq": [
{
"name": "Options",
"options": ["--output-dir", "--threads", "--snakemake", "--quiet", "--help"],
},
],
"harpy demultiplex gen1": [
{
"name": "Parameters",
"options": ["--schema"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy qc": [
{
"name": "Parameters",
"options": ["--max-length", "--ignore-adapters", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy align bwa": [
{
"name": "Parameters",
"options": ["--genome", "--quality-filter", "--molecule-distance", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy align ema": [
{
"name": "Parameters",
"options": ["--platform", "--whitelist", "--genome", "--quality-filter", "--ema-bins", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy align minimap": [
{
"name": "Parameters",
"options": ["--genome", "--quality-filter", "--molecule-distance", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy snp mpileup": [
{
"name": "Parameters",
"options": ["--genome", "--populations", "--ploidy", "--regions", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy snp freebayes": [
{
"name": "Parameters",
"options": ["--genome", "--populations", "--ploidy", "--regions", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy sv leviathan": [
{
"name": "Parameters",
"options": ["--genome", "--min-sv", "--min-barcodes", "--populations", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy sv naibr": [
{
"name": "Module Parameters",
"options": ["--genome", "--vcf", "--min-sv", "--min-barcodes", "--molecule-distance", "--populations", "--extra-params"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy impute": [
{
"name": "Parameters",
"options": ["--vcf", "--parameters", "--extra-params", "--vcf-samples"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy phase": [
{
"name": "Parameters",
"options": ["--vcf", "--molecule-distance", "--genome", "--prune-threshold", "--ignore-bx", "--extra-params", "--vcf-samples"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--skipreports", "--snakemake", "--quiet", "--help"],
},
],
"harpy simulate linkedreads": [
{
"name": "Parameters",
"options": ["--barcodes", "--read-pairs", "--outer-distance", "--distance-sd", "--molecule-length", "--partitions", "--molecules-per"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--threads", "--snakemake", "--quiet", "--help"],
},
],
"harpy simulate snpindel": [
{
"name": "Known Variants",
"options": ["--snp-vcf", "--indel-vcf"],
},
{
"name": "Random Variants",
"options": ["--snp-count", "--indel-count", "--titv-ratio", "--indel-ratio", "--snp-gene-constraints", "--genes", "--centromeres", "--exclude-chr"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--prefix", "--heterozygosity", "--randomseed", "--snakemake", "--quiet", "--help"],
},
],
"harpy simulate inversion": [
{
"name": "Known Variants",
"options": ["--vcf"],
},
{
"name": "Random Variants",
"options": ["--count", "--min-size", "--max-size", "--genes", "--centromeres", "--exclude-chr"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--prefix", "--heterozygosity", "--randomseed", "--snakemake", "--quiet", "--help"],
},
],
"harpy simulate cnv": [
{
"name": "Known Variants",
"options": ["--vcf"],
},
{
"name": "Random Variants",
"options": ["--count", "--min-size", "--max-size", "--max-copy", "--dup-ratio", "--gain-ratio", "--genes", "--centromeres", "--exclude-chr"],
},
{
"name": "Other Options",
"options": ["--output-dir", "--prefix", "--heterozygosity", "--randomseed", "--snakemake", "--quiet", "--help"],
},
],
"harpy simulate translocation": [
{
"name": "Known Variants",
"options": ["--vcf"],
},
{
"name": "Random Variants",
"options": ["--count", "--genes", "--centromeres", "--exclude-chr"],
},
{
"name": "Other Options",
"options": ["--output-dir","--prefix","--heterozygosity", "--randomseed", "--snakemake", "--quiet", "--help"],
},
],
}

click.rich_click.OPTION_GROUPS = demultiplex.docstring | preflight.docstring | qc.docstring | align.docstring | snp.docstring | sv.docstring | impute.docstring | phase.docstring | simulate.docstring

def main():
try:
Expand Down
Loading

0 comments on commit 2da18f1

Please sign in to comment.