Skip to content

Commit

Permalink
Merge pull request #88 from pdimens/hpc_copy
Browse files Browse the repository at this point in the history
new input model listing abspaths in config.yaml
  • Loading branch information
pdimens authored Jun 20, 2024
2 parents ee8a29f + e66baf1 commit 1b6a2f1
Show file tree
Hide file tree
Showing 39 changed files with 1,329 additions and 1,124 deletions.
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,22 @@ mamba create -n harpy -c bioconda -c conda-forge harpy

If you wish to install harpy and its dependencies into an existing environment, activate that environment (`conda activate env_name`) and execute this installation code:
```bash
mamba install -c bioconda -c conda-forge harpy
mamba install -c conda-forge bioconda::harpy
```
Or provide `-n envname` to install it into an existing environment named `envname`
```bash
mamba install -n envname -c bioconda -c conda-forge harpy
mamba install -n envname -c conda-forge bioconda::harpy
```

---

</details>

## Update
```bash
mamba update -c conda-forge bioconda::harpy
```

## 🌟 Activate the harpy environment
Once conda/mamba finishes, activate the conda/mamba environment you installed harpy into with
```bash
Expand Down
12 changes: 6 additions & 6 deletions src/harpy/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from . import sv
from . import container
from . import hpc
from . import resume
from .popgroups import popgroup
from .stitchparams import stitchparams
import rich_click as click
Expand All @@ -43,11 +44,9 @@
@click.version_option("1.1.0", prog_name="Harpy")
def cli():
"""
## Harpy haplotagging pipeline
An automated workflow to demultiplex sequences, trim and qc reads,
map sequences, call variants, impute genotypes, and phase
haplotypes of Haplotagging data. Batteries included.
An automated workflow for haplotagging linked-read data
to go from raw data to genotypes (or phased haplotypes).
Batteries included.
**demultiplex >> qc >> align >> snp >> impute >> phase >> sv**
Expand All @@ -69,6 +68,7 @@ def cli():
cli.add_command(simulate.simulate)
cli.add_command(container.containerize)
cli.add_command(hpc.hpc)
cli.add_command(resume.resume)

## the modules ##
click.rich_click.COMMAND_GROUPS = {
Expand All @@ -80,7 +80,7 @@ def cli():
},
{
"name": "Other Commands",
"commands": ["hpc", "preflight", "popgroup", "stitchparams"]
"commands": ["resume", "hpc", "preflight", "popgroup", "stitchparams"]
}
],
} | simulate.commandstring | hpc.docstring
Expand Down
62 changes: 35 additions & 27 deletions src/harpy/align.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""Harpy align workflows"""

from genericpath import exists, isfile
import os
import sys
import subprocess
from time import sleep
from pathlib import Path
import rich_click as click
from .conda_deps import generate_conda_deps
from .helperfunctions import fetch_report, fetch_rule, fetch_script
from .fileparsers import get_samples_from_fastq, parse_fastq_inputs
from .fileparsers import parse_fastq_inputs
from .printfunctions import print_error, print_solution, print_notice, print_onstart
from .validations import validate_input_by_ext

Expand Down Expand Up @@ -96,7 +96,7 @@ def bwa(inputs, output_dir, genome, depth_window, threads, extra_params, quality
sdm = "conda" if conda else "conda apptainer"
command = f'snakemake --rerun-incomplete --rerun-triggers input mtime params --nolock --software-deployment-method {sdm} --conda-prefix ./.snakemake/conda --cores {threads} --directory . '
command += f"--snakefile {workflowdir}/align-bwa.smk "
command += f"--configfile {workflowdir}/config.yml "
command += f"--configfile {workflowdir}/config.yaml "
if hpc:
command += f"--workflow-profile {hpc} "
if quiet:
Expand All @@ -108,29 +108,31 @@ def bwa(inputs, output_dir, genome, depth_window, threads, extra_params, quality
sys.exit(0)

os.makedirs(f"{workflowdir}/", exist_ok= True)
sn = parse_fastq_inputs(inputs, f"{workflowdir}/input")
samplenames = get_samples_from_fastq(f"{workflowdir}/input")
fqlist, sample_count = parse_fastq_inputs(inputs)
validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"])
fetch_rule(workflowdir, "align-bwa.smk")
fetch_script(workflowdir, "assignMI.py")
fetch_script(workflowdir, "bxStats.py")
fetch_report(workflowdir, "AlignStats.Rmd")

with open(f"{workflowdir}/config.yml", "w", encoding="utf-8") as config:
config.write(f"genomefile: {genome}\n")
config.write(f"seq_directory: {workflowdir}/input\n")
with open(f"{workflowdir}/config.yaml", "w", encoding="utf-8") as config:
config.write("workflow: align bwa\n")
config.write(f"output_directory: {output_dir}\n")
config.write(f"samplenames: {samplenames}\n")
config.write(f"quality: {quality_filter}\n")
config.write(f"alignment_quality: {quality_filter}\n")
config.write(f"molecule_distance: {molecule_distance}\n")
config.write(f"depth_windowsize: {depth_window}\n")
config.write(f"skipreports: {skipreports}\n")
if extra_params is not None:
config.write(f"extra: {extra_params}\n")
config.write(f"workflow_call: {command}\n")
config.write("inputs:\n")
config.write(f" genome: {Path(genome).resolve()}\n")
config.write(" fastq:\n")
for i in fqlist:
config.write(f" - {i}\n")

print_onstart(
f"Samples: {len(samplenames)}\nOutput Directory: {output_dir}",
f"Samples: {sample_count}\nOutput Directory: {output_dir}",
"align bwa"
)
generate_conda_deps()
Expand Down Expand Up @@ -172,7 +174,7 @@ def ema(inputs, output_dir, platform, whitelist, genome, depth_window, threads,
sdm = "conda" if conda else "conda apptainer"
command = f'snakemake --rerun-incomplete --rerun-triggers input mtime params --nolock --software-deployment-method {sdm} --conda-prefix ./.snakemake/conda --cores {threads} --directory . '
command += f"--snakefile {workflowdir}/align-ema.smk "
command += f"--configfile {workflowdir}/config.yml "
command += f"--configfile {workflowdir}/config.yaml "
if hpc:
command += f"--workflow-profile {hpc} "
if quiet:
Expand All @@ -197,32 +199,34 @@ def ema(inputs, output_dir, platform, whitelist, genome, depth_window, threads,
sleep(3)

os.makedirs(f"{workflowdir}/", exist_ok= True)
sn = parse_fastq_inputs(inputs, f"{workflowdir}/input")
samplenames = get_samples_from_fastq(f"{workflowdir}/input")
fqlist, sample_count = parse_fastq_inputs(inputs)
validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"])
fetch_rule(workflowdir, "align-ema.smk")
fetch_script(workflowdir, "bxStats.py")
for i in ["EmaCount", "AlignStats"]:
fetch_report(workflowdir, f"{i}.Rmd")

with open(f"{workflowdir}/config.yml", "w", encoding="utf-8") as config:
config.write(f"genomefile: {genome}\n")
config.write(f"seq_directory: {workflowdir}/input\n")
with open(f"{workflowdir}/config.yaml", "w", encoding="utf-8") as config:
config.write("workflow: align ema\n")
config.write(f"output_directory: {output_dir}\n")
config.write(f"samplenames: {samplenames}\n")
config.write(f"quality: {quality_filter}\n")
config.write(f"platform: {platform}\n")
config.write(f"EMA_bins: {ema_bins}\n")
config.write(f"depth_windowsize: {depth_window}\n")
config.write(f"skipreports: {skipreports}\n")
if whitelist:
config.write(f"whitelist: {whitelist}\n")
if extra_params is not None:
config.write(f"extra: {extra_params}\n")
config.write(f"workflow_call: {command}\n")
config.write("inputs:\n")
config.write(f" genome: {Path(genome).resolve()}\n")
if whitelist:
config.write(f" whitelist: {Path(whitelist).resolve()}\n")
config.write(" fastq:\n")
for i in fqlist:
config.write(f" - {i}\n")

print_onstart(
f"Samples: {len(samplenames)}\nPlatform: {platform}\nOutput Directory: {output_dir}/",
f"Samples: {sample_count}\nPlatform: {platform}\nOutput Directory: {output_dir}/",
"align ema"
)
generate_conda_deps()
Expand Down Expand Up @@ -260,7 +264,7 @@ def minimap(inputs, output_dir, genome, depth_window, threads, extra_params, qua
sdm = "conda" if conda else "conda apptainer"
command = f'snakemake --rerun-incomplete --rerun-triggers input mtime params --nolock --software-deployment-method {sdm} --conda-prefix ./.snakemake/conda --cores {threads} --directory . '
command += f"--snakefile {workflowdir}/align-minimap.smk "
command += f"--configfile {workflowdir}/config.yml "
command += f"--configfile {workflowdir}/config.yaml "
if hpc:
command += f"--workflow-profile {hpc} "
if quiet:
Expand All @@ -272,29 +276,33 @@ def minimap(inputs, output_dir, genome, depth_window, threads, extra_params, qua
sys.exit(0)

os.makedirs(f"{workflowdir}/", exist_ok= True)
sn = parse_fastq_inputs(inputs, f"{workflowdir}/input")
samplenames = get_samples_from_fastq(f"{workflowdir}/input")
fqlist, sample_count = parse_fastq_inputs(inputs)
validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"])
fetch_rule(workflowdir, "align-minimap.smk")
fetch_script(workflowdir, "assignMI.py")
fetch_script(workflowdir, "bxStats.py")
fetch_report(workflowdir, "AlignStats.Rmd")

with open(f"{workflowdir}/config.yml", "w", encoding="utf-8") as config:
with open(f"{workflowdir}/config.yaml", "w", encoding="utf-8") as config:
config.write("workflow: align minimap\n")
config.write(f"genomefile: {genome}\n")
config.write(f"seq_directory: {workflowdir}/input\n")
config.write(f"output_directory: {output_dir}\n")
config.write(f"samplenames: {samplenames}\n")
config.write(f"quality: {quality_filter}\n")
config.write(f"molecule_distance: {molecule_distance}\n")
config.write(f"depth_windowsize: {depth_window}\n")
config.write(f"skipreports: {skipreports}\n")
if extra_params is not None:
config.write(f"extra: {extra_params}\n")
config.write(f"workflow_call: {command}\n")
config.write("inputs:\n")
config.write(f" genome: {Path(genome).resolve()}\n")
config.write(" fastq:\n")
for i in fqlist:
config.write(f" - {i}\n")

print_onstart(
f"Samples: {len(samplenames)}\nOutput Directory: {output_dir}",
f"Samples: {sample_count}\nOutput Directory: {output_dir}",
"align minimap"
)
generate_conda_deps()
Expand Down
2 changes: 2 additions & 0 deletions src/harpy/bin/depthWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
start = 1
end = args.windowsize
lastcontig = None
position = 0

for line in sys.stdin:
# Remove the newline character at the end of the line
line = line.rstrip().split()
Expand Down
23 changes: 13 additions & 10 deletions src/harpy/demultiplex.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Harpy demultiplex workflows"""

import os
import sys
import subprocess
from pathlib import Path
import rich_click as click
from .conda_deps import generate_conda_deps
from .printfunctions import print_onstart
Expand Down Expand Up @@ -61,31 +63,32 @@ def gen1(r1_fq, r2_fq, i1_fq, i2_fq, output_dir, schema, threads, snakemake, ski
sdm = "conda" if conda else "conda apptainer"
command = f'snakemake --rerun-incomplete --rerun-triggers input mtime params --nolock --software-deployment-method {sdm} --conda-prefix ./.snakemake/conda --cores {threads} --directory . '
command += f"--snakefile {workflowdir}/demultiplex.gen1.smk "
command += f"--configfile {workflowdir}/config.yml "
command += f"--configfile {workflowdir}/config.yaml "
if hpc:
command += f"--workflow-profile {hpc} "
if quiet:
command += "--quiet all "
if snakemake is not None:
command += snakemake

if print_only:
click.echo(command)
sys.exit(0)

#check_demux_fastq(fastq_input)
validate_demuxschema(schema)
os.makedirs(f"{workflowdir}", exist_ok=True)
fetch_rule(workflowdir, "demultiplex.gen1.smk")

with open(f"{workflowdir}/config.yml", "w") as config:
config.write(f"R1: {r1_fq}\n")
config.write(f"R2: {r2_fq}\n")
config.write(f"I1: {i1_fq}\n")
config.write(f"I2: {i2_fq}\n")

with open(f"{workflowdir}/config.yaml", "w", encoding= "utf-8") as config:
config.write("workflow: demultiplex gen1\n")
config.write(f"output_directory: {output_dir}\n")
config.write(f"samplefile: {schema}\n")
config.write(f"skipreports: {skipreports}\n")
config.write(f"workflow_call: {command}\n")
config.write("inputs:\n")
config.write(f" demultiplex_schema: {Path(schema).resolve()}\n")
config.write(f" R1: {Path(r1_fq).resolve()}\n")
config.write(f" R2: {Path(r2_fq).resolve()}\n")
config.write(f" I1: {Path(i1_fq).resolve()}\n")
config.write(f" I2: {Path(i2_fq).resolve()}\n")

print_onstart(
f"Haplotag Type: Generation I\nDemultiplex Schema: {schema}\nOutput Directory: {output_dir}",
Expand Down
Loading

0 comments on commit 1b6a2f1

Please sign in to comment.