Skip to content

Commit

Permalink
fix handling of indels and sample names
Browse files Browse the repository at this point in the history
  • Loading branch information
pdimens committed Jul 10, 2024
1 parent a55c083 commit 47fd175
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
9 changes: 5 additions & 4 deletions src/harpy/phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,20 @@ def phase(inputs, output_dir, vcf, threads, molecule_distance, prune_threshold,
with open(f"{workflowdir}/config.yaml", "w", encoding="utf-8") as config:
config.write("workflow: phase\n")
config.write(f"output_directory: {output_dir}\n")
config.write(f"noBX: {ignore_bx}\n")
config.write(f"ignore_bx: {ignore_bx}\n")
config.write(f"prune: {prune_threshold/100}\n")
config.write(f"molecule_distance: {molecule_distance}\n")
config.write(f"samples_from_vcf: {vcf_samples}\n")
if extra_params is not None:
config.write(f"extra: {extra_params}\n")
config.write(f"skipreports: {skipreports}\n")
config.write(f"skip_reports: {skipreports}\n")
config.write(f"workflow_call: {command}\n")
config.write("inputs:\n")
config.write(f" variantfile: {vcf}\n")
if genome is not None:
config.write(f" genome: {genome}\n")
if not os.path.exists(f"{genome}.fai"):
subprocess.run(f"samtools faidx --fai-idx {genome}.fai {genome}".split())
# if not os.path.exists(f"{genome}.fai"):
# subprocess.run(f"samtools faidx --fai-idx {genome}.fai {genome}".split())
config.write(" alignments:\n")
for i in bamlist:
config.write(f" - {i}\n")
Expand Down
20 changes: 14 additions & 6 deletions src/harpy/snakefiles/phase.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
containerized: "docker://pdimens/harpy:latest"

import sys
import subprocess
import multiprocessing
from pathlib import Path
from rich import print as rprint
Expand All @@ -13,20 +14,27 @@ pruning = config["prune"]
molecule_distance = config["molecule_distance"]
extra = config.get("extra", "")
outdir = config["output_directory"]
envdir = os.getcwd() + "/.harpy_envs"
skipreports = config["skipreports"]

envdir = os.getcwd() + "/.harpy_envs"
skipreports = config["skip_reports"]
samples_from_vcf = config["samples_from_vcf"]
variantfile = config["inputs"]["variantfile"]
bamlist = config["inputs"]["alignments"]
samplenames = [Path(i).stem for i in bamlist]

# toggle linked-read aware mode
if config["noBX"]:
if config["ignore_bx"]:
fragfile = outdir + "/extractHairs/{sample}.unlinked.frags"
linkarg = "--10x 0"
else:
fragfile = outdir + "/linkFragments/{sample}.linked.frags"
linkarg = "--10x 1"

if samples_from_vcf:
bcfquery = subprocess.run(["bcftools", "query", "-l", variantfile], stdout=subprocess.PIPE)
samplenames = bcfquery.stdout.read().decode().split()
else:
samplenames = [Path(i).stem for i in bamlist]


# toggle indel mode
if config["inputs"].get("genome", None):
genomefile = config["inputs"]["genome"]
Expand All @@ -41,7 +49,7 @@ else:
geno = []
genofai = []
bn = []
indels = True
indels = False

wildcard_constraints:
sample = "[a-zA-Z0-9._-]+"
Expand Down

0 comments on commit 47fd175

Please sign in to comment.