Skip to content

Commit

Permalink
Merge pull request #144 from pdimens/metassembly
Browse files Browse the repository at this point in the history
Add Metassembly
  • Loading branch information
pdimens authored Oct 15, 2024
2 parents 63493d6 + 708a2e3 commit 4978a98
Show file tree
Hide file tree
Showing 53 changed files with 1,205 additions and 592 deletions.
6 changes: 3 additions & 3 deletions .deprecated/align-minimap.smk
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extra = config.get("extra", "")
bn = os.path.basename(genomefile)
genome_zip = True if bn.lower().endswith(".gz") else False
bn_idx = f"{bn}.gzi" if genome_zip else f"{bn}.fai"
skipreports = config["skip_reports"]
skip_reports = config["skip_reports"]
windowsize = config["depth_windowsize"]
molecule_distance = config["molecule_distance"]

Expand Down Expand Up @@ -323,8 +323,8 @@ rule workflow_summary:
default_target: True
input:
bams = collect(outdir + "/{sample}.{ext}", sample = samplenames, ext = ["bam","bam.bai"]),
samtools = outdir + "/reports/minimap.stats.html" if not skipreports else [] ,
bx_reports = collect(outdir + "/reports/{sample}.html", sample = samplenames) if not skipreports else []
samtools = outdir + "/reports/minimap.stats.html" if not skip_reports else [] ,
bx_reports = collect(outdir + "/reports/{sample}.html", sample = samplenames) if not skip_reports else []
params:
quality = config["quality"],
extra = extra
Expand Down
4 changes: 2 additions & 2 deletions .deprecated/phase.bak.smk
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ else:
fragfile = outdir + "/linkFragments/{sample}.linked.frags"

linkarg = "--10x 0" if config["noBX"] else "--10x 1"
skipreports = config["skip_reports"]
skip_reports = config["skip_reports"]

try:
indelarg = "--indels 1 --ref " + config["indels"]
Expand Down Expand Up @@ -289,7 +289,7 @@ rule workflow_summary:
default_target: True
input:
vcf = outdir + "/variants.phased.bcf",
reports = outdir + "/reports/phase.html" if not skipreports else []
reports = outdir + "/reports/phase.html" if not skip_reports else []
params:
prune = f"--threshold {pruning}" if pruning > 0 else "--no_prune 1",
extra = extra
Expand Down
6 changes: 6 additions & 0 deletions .github/filters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,11 @@ simreads: &simreads
- 'extractReads.cpp'
- 'harpy/bin/10xtoHaplotag.py'
- 'harpy/scripts/LRSIM_harpy.pl'
metassembly: &metassembly
- *common
- *container
- 'harpy/metassembly.py'
- 'harpy/snakefiles/metassembly.smk'
other: &other
- 'harpy/stitchparams.py'
- 'harpy/popgroup.py'
Expand All @@ -170,4 +175,5 @@ modules:
- *naibr
- *simvars
- *simreads
- *metassembly
- *other
46 changes: 43 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ jobs:
phase: ${{ steps.filter.outputs.phase }}
simvars: ${{ steps.filter.outputs.simvars }}
simreads: ${{ steps.filter.outputs.simreads }}
metassembly: ${{ steps.filter.outputs.metassembly }}
other: ${{ steps.filter.outputs.other }}
container: ${{ steps.filter.outputs.container }}
modules: ${{ steps.filter.outputs.modules }}
Expand Down Expand Up @@ -106,7 +107,7 @@ jobs:
id: singularity
shell: micromamba-shell {0}
if: ${{ needs.changes.outputs.modules == 'true' }}
run: harpy qc --skipreports --quiet test/fastq/sample1.*.fq.gz
run: harpy qc --skip-reports --quiet test/fastq/sample1.*.fq.gz
- name: Create Singularity Artifact
if: ${{ steps.singularity.outcome == 'success' }}
uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -231,7 +232,7 @@ jobs:
run: harpy qc -x "--trim_poly_g" --quiet test/fastq
- name: harpy qc all options
shell: micromamba-shell {0}
run: harpy qc -a -d -c --quiet test/fastq
run: harpy qc -a -d -c 21,40,3,0 --quiet test/fastq
deconvolve:
needs: [changes, pkgbuild]
if: ${{ needs.changes.outputs.deconvolve == 'true' && needs.pkgbuild.result == 'success' }}
Expand Down Expand Up @@ -787,4 +788,43 @@ jobs:
harpy hpc slurm
harpy hpc googlebatch
harpy hpc lsf
harpy hpc htcondor
harpy hpc htcondor
metassembly:
needs: [changes, pkgbuild]
if: ${{ needs.changes.outputs.metassembly == 'true' && needs.pkgbuild.result == 'success' }}
name: metassembly
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Setup Mamba
uses: mamba-org/setup-micromamba@v1
env:
ACTIONS_STEP_DEBUG: true
with:
init-shell: bash
generate-run-shell: true
environment-file: resources/harpy.yaml
cache-environment: true
post-cleanup: 'all'
log-level: error
- name: Install Harpy
shell: micromamba-shell {0}
run: |
python3 -m pip install --upgrade build && python3 -m build
pip install dist/*.whl
resources/buildforCI.sh
- name: Clear Space
run: rm -rf /opt/hostedtoolcache && mkdir -p .snakemake/singularity
- name: Download Singularity Artifact
uses: actions/download-artifact@v4
with:
name: deps-image
path: .snakemake/singularity
- name: create a metassembly
shell: micromamba-shell {0}
run: harpy metassembly --quiet -m 6000 test/fastq/sample1.*


61 changes: 61 additions & 0 deletions Assembly/workflow/assembly.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import logging

onstart:
logger.logger.addHandler(logging.FileHandler(config["snakemake_log"]))
onsuccess:
os.remove(logger.logfile)
onerror:
os.remove(logger.logfile)

FQ1 = config["inputs"]["fastq_r1"]
FQ2 = config["inputs"]["fastq_r2"]
outdir = config["output_directory"]
envdir = os.getcwd() + "/.harpy_envs"
max_mem = config["spades"]["max_memory"]
k_param = config["spades"]["k"]
extra = config["spades"].get("extra", "")

rule cloudspades:
input:
FQ_R1 = FQ1,
FQ_R2 = FQ2
output:
f"{outdir}/contigs.fasta",
f"{outdir}/scaffolds.fasta"
params:
outdir = outdir,
k = k_param,
mem = max_mem // 1000,
extra = extra
log:
outdir + "/logs/assembly.log"
conda:
f"{envdir}/assembly.yaml"
threads:
workflow.cores
resources:
mem_mb=max_mem
shell:
"spades.py -t {threads} -m {params.mem} -k {params.k} {params.extra} --gemcode1-1 {input.FQ_R1} --gemcode1-2 {input.FQ_R2} -o {params.outdir} --isolate > {log}"

rule workflow_summary:
default_target: True
input:
f"{outdir}/athena/athena.asm.fa"
params:
k_param = k_param,
max_mem = max_mem // 1000,
extra = extra
run:
summary_template = f"""
The harpy assemble workflow ran using these parameters:
Reads were assembled using cloudspades:
spades.py -t THREADS -m {params.max_mem} --gemcode1-1 FQ1 --gemcode1-2 FQ2 --isolate -k {params.k_param} {params.extra}
The Snakemake workflow was called via command line:
{config["workflow_call"]}
"""
with open(outdir + "/workflow/metassembly.summary", "w") as f:
f.write(summary_template)
13 changes: 13 additions & 0 deletions Assembly/workflow/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
workflow: assembly
snakemake_log: Assembly/logs/snakemake/assembly.1.15_10_2024.log
output_directory: Assembly
barcode_tag: BX
spades:
assembler: None
max_memory: 250000
k: auto
skip_reports: False
workflow_call: snakemake --rerun-incomplete --show-failed-logs --rerun-triggers input mtime params --nolock --software-deployment-method conda --conda-prefix .snakemake/conda --cores 4 --directory . --snakefile Assembly/workflow/assembly.smk --configfile Assembly/workflow/config.yaml
inputs:
fastq_r1: test/fastq/sample1.F.fq.gz
fastq_r2: test/fastq/sample1.R.fq.gz
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ harpy
```

## 🌈 Get Started
No data? No problem! Harpy lets you [simulate genomic variants](https://pdimens.github.io/harpy/modules/simulate/simulate-variants/)
from an existing genome and can also [create haplotag data](https://pdimens.github.io/harpy/modules/simulate/simulate-linkedreads/)
No data? No problem! Harpy lets you [simulate genomic variants](https://pdimens.github.io/harpy/workflows/simulate/simulate-variants/)
from an existing genome and can also [create haplotag data](https://pdimens.github.io/harpy/workflows/simulate/simulate-linkedreads/)
from an existing genome! You can see what haplotag data (and Harpy) are like without paying a single cent!
6 changes: 3 additions & 3 deletions harpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ def cli():
cli.add_command(deconvolve.deconvolve)
cli.add_command(metassembly.metassembly)

## the modules ##
## the workflows ##
click.rich_click.COMMAND_GROUPS = {
"harpy":
[
{
"name": "Modules",
"commands": ["demultiplex", "metassembly","qc", "align","snp","sv","impute","phase", "simulate"],
"name": "workflows",
"commands": ["demultiplex","qc", "align","snp","sv","impute","phase", "simulate", "metassembly"],
},
{
"name": "Other Commands",
Expand Down
15 changes: 11 additions & 4 deletions harpy/_conda.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ def create_conda_recipes():
"conda-forge::libzlib",
"conda-forge::xz"
],
"metassembly" : [
"bioconda::spades=4.0"
"assembly": [
"conda-forge::python=3"
],
"athena": [
"bioconda::athena_meta"
"metassembly": [
"bioconda::athena_meta=1.2"
],
"phase" : [
"bioconda::hapcut2",
Expand Down Expand Up @@ -79,3 +79,10 @@ def create_conda_recipes():
yml.write("\n - ".join(condachannels))
yml.write("\ndependencies:\n - ")
yml.write("\n - ".join(deps) + "\n")

# post-deployment scripts
with open(".harpy_envs/assembly.post-deploy.sh", "w", encoding="utf-8") as shellscript:
shellscript.write("wget -O .spades.tar.gz https://github.com/ablab/spades/releases/download/v4.0.0/SPAdes-4.0.0-Linux.tar.gz\n")
shellscript.write("tar -xvzf .spades.tar.gz && rm .spades.tar.gz\n")
shellscript.write("mv SPAdes-4.0.0-Linux/bin/* ${CONDA_PREFIX}/bin && mv SPAdes-4.0.0-Linux/share/* ${CONDA_PREFIX}/share\n")
shellscript.write("rm -r SPAdes-4.0.0-Linux\n")
50 changes: 21 additions & 29 deletions harpy/_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,15 @@
import glob
import subprocess
from rich import print as rprint
from rich.progress import Progress, BarColumn, TextColumn, TimeElapsedColumn, SpinnerColumn
from rich.console import Console
from ._misc import gzip_file
from ._misc import gzip_file, harpy_progressbar, harpy_pulsebar
from ._printing import print_onsuccess, print_onstart, print_onerror, print_setup_error

EXIT_CODE_SUCCESS = 0
EXIT_CODE_GENERIC_ERROR = 1
EXIT_CODE_CONDA_ERROR = 2
EXIT_CODE_RUNTIME_ERROR = 3

def iserror(text):
"""logical check for erroring trigger words in snakemake output"""
return "Exception" in text or "Error" in text or "MissingOutputException" in text
Expand Down Expand Up @@ -38,8 +42,8 @@ def launch_snakemake(sm_args, workflow, starttext, outdir, sm_logfile, quiet):
output = process.stderr.readline()
# check for syntax errors at the very beginning
if process.poll() or iserror(output):
exitcode = 0 if process.poll() == 0 else 1
exitcode = 2 if iserror(output) else exitcode
exitcode = EXIT_CODE_SUCCESS if process.poll() == 0 else EXIT_CODE_GENERIC_ERROR
exitcode = EXIT_CODE_CONDA_ERROR if "Conda" in output else exitcode
break
if not quiet:
console = Console()
Expand All @@ -50,12 +54,11 @@ def launch_snakemake(sm_args, workflow, starttext, outdir, sm_logfile, quiet):
while output.startswith("Building DAG of jobs...") or output.startswith("Assuming"):
output = process.stderr.readline()
if process.poll() or iserror(output):
exitcode = 0 if process.poll() == 0 else 1
exitcode = EXIT_CODE_SUCCESS if process.poll() == 0 else EXIT_CODE_GENERIC_ERROR
break

while not output.startswith("Job stats:"):
# print dependency text only once
if "Downloading and installing remote packages" in output:
if "Downloading and installing remote packages" in output or "Running post-deploy" in output:
deps = True
deploy_text = "[dim]Installing software dependencies"
break
Expand All @@ -64,40 +67,29 @@ def launch_snakemake(sm_args, workflow, starttext, outdir, sm_logfile, quiet):
deploy_text = "[dim]Downloading software container"
break
if "Nothing to be" in output:
exitcode = 0
exitcode = EXIT_CODE_SUCCESS
break
if "MissingInput" in output:
exitcode = EXIT_CODE_GENERIC_ERROR
break
output = process.stderr.readline()

# if dependency text present, print pulsing progress bar
if deps:
with Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(bar_width= 70 - len(deploy_text), pulse_style = "grey46"),
TimeElapsedColumn(),
transient=True,
disable=quiet
) as progress:
with harpy_pulsebar(quiet, deploy_text) as progress:
progress.add_task("[dim]" + deploy_text, total = None)
while not output.startswith("Job stats:"):
output = process.stderr.readline()
if process.poll() or iserror(output):
exitcode = 0 if process.poll() == 0 else 2
exitcode = EXIT_CODE_SUCCESS if process.poll() == 0 else 2
break
progress.stop()
if process.poll() or exitcode:
break
if "Nothing to be" in output:
exitcode = 0
exitcode = EXIT_CODE_SUCCESS
break
with Progress(
SpinnerColumn(spinner_name = "arc", style = "dim"),
TextColumn("[progress.description]{task.description}"),
BarColumn(complete_style="yellow", finished_style="blue"),
TextColumn("[progress.remaining]{task.completed}/{task.total}", style = "magenta"),
TimeElapsedColumn(),
transient=True,
disable=quiet
) as progress:
with harpy_progressbar(quiet) as progress:
# process the job summary
job_inventory = {}
while True:
Expand All @@ -114,7 +106,7 @@ def launch_snakemake(sm_args, workflow, starttext, outdir, sm_logfile, quiet):
pass
# checkpoint
if process.poll() or iserror(output):
exitcode = 0 if process.poll() == 0 else 1
exitcode = EXIT_CODE_SUCCESS if process.poll() == 0 else EXIT_CODE_GENERIC_ERROR
break
if process.poll() or exitcode:
break
Expand All @@ -124,11 +116,11 @@ def launch_snakemake(sm_args, workflow, starttext, outdir, sm_logfile, quiet):
output = process.stderr.readline()
if iserror(output) or process.poll() == 1:
progress.stop()
exitcode = 3
exitcode = EXIT_CODE_RUNTIME_ERROR
break
if process.poll() == 0 or output.startswith("Complete log:") or output.startswith("Nothing to be"):
progress.stop()
exitcode = 0 if process.poll() == 0 else 3
exitcode = EXIT_CODE_SUCCESS if process.poll() == 0 else EXIT_CODE_RUNTIME_ERROR
break
# add new progress bar track if the rule doesn't have one yet
rulematch = re.search(r"rule\s\w+:", output)
Expand Down
Loading

0 comments on commit 4978a98

Please sign in to comment.