Skip to content

Commit

Permalink
update bcftools dep to 1.20 and update logic
Browse files Browse the repository at this point in the history
  • Loading branch information
pdimens committed Apr 26, 2024
1 parent 7072f3e commit 9409502
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/filters.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ naibr:
- 'src/harpy/globalscripts/inferSV.py'
phase:
- *common
- 'src/harpy/rules/phase-pop.smk'
- 'src/harpy/rules/phase.smk'
- '**phase.py'
- 'test/bam/**'
- 'test/vcf/test.bcf'
Expand Down
2 changes: 1 addition & 1 deletion src/harpy/helperfunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def generate_conda_deps():
environ = {
"qc" : ["falco", "fastp", "multiqc", "pysam=0.22"],
"align": ["bwa", "ema","icu","libzlib", "minimap2", "samtools=1.19", "seqtk", "xz"],
"variants.snp": ["bcftools=1.19", "freebayes=1.3.6"],
"variants.snp": ["bcftools=1.20", "freebayes=1.3.6"],
"variants.sv": ["leviathan", "naibr-plus"],
"phase" : ["hapcut2", "whatshap"],
"simulations" : ["perl", "perl-math-random", "perl-inline-c", "perl-parse-recdescent", "numpy", "dwgsim", "alienzj::msort"],
Expand Down
4 changes: 2 additions & 2 deletions src/harpy/phase.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def phase(input, output_dir, vcf, threads, molecule_distance, prune_threshold, v
workflowdir = f"{output_dir}/workflow"
command = f'snakemake --rerun-incomplete --rerun-triggers input mtime params --nolock --software-deployment-method conda --conda-prefix ./.snakemake/conda --cores {threads} --directory .'.split()
command.append('--snakefile')
command.append(f"{workflowdir}/phase-pop.smk")
command.append(f"{workflowdir}/phase.smk")
command.append("--configfile")
command.append(f"{workflowdir}/config.yml")
if quiet:
Expand All @@ -58,7 +58,7 @@ def phase(input, output_dir, vcf, threads, molecule_distance, prune_threshold, v
validate_bamfiles(f"{workflowdir}/input", samplenames)
if genome:
validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"])
fetch_rule(workflowdir, "phase-pop.smk")
fetch_rule(workflowdir, "phase.smk")
fetch_report(workflowdir, "HapCut2.Rmd")
prune_threshold /= 100

Expand Down
13 changes: 2 additions & 11 deletions src/harpy/rules/phase-pop.smk → src/harpy/rules/phase.smk
Original file line number Diff line number Diff line change
Expand Up @@ -219,21 +219,12 @@ rule merge_samples:
output:
bcf = outdir + "/variants.phased.bcf",
idx = outdir + "/variants.phased.bcf.csi"
params:
"true" if len(samplenames) > 1 else "false"
threads:
30
message:
"Combining results into {output.bcf}" if len(samplenames) > 1 else "Copying results to {output.bcf}"
shell:
"""
if [ "{params}" = true ]; then
bcftools merge --threads {threads} -Ob -o {output.bcf} --write-index {input.bcf}
else
cp {input.bcf} {output.bcf}
cp {input.idx} {output.idx}
fi
"""
"bcftools merge --threads {threads} -Ob -o {output.bcf} --force-single --write-index {input.bcf}"

rule summarize_blocks:
input:
Expand Down Expand Up @@ -292,7 +283,7 @@ rule log_workflow:
_ = f.write(" awk '!/<ID=GX/' |\n")
_ = f.write(" sed 's/:GX:/:GT:/' |\n")
_ = f.write(" bcftools view -Ob -o sample.annot.bcf -\n")
_ = f.write(" bcftools merge --output-type b samples.annot.bcf\n\n")
_ = f.write(" bcftools merge --output-type b samples*.annot.bcf\n\n")
_ = f.write("The header.file of extra vcf tags:\n")
_ = f.write(' ##INFO=<ID=HAPCUT,Number=0,Type=Flag,Description="The haplotype was created with Hapcut2">\n')
_ = f.write(' ##FORMAT=<ID=GX,Number=1,Type=String,Description="Haplotype">\n')
Expand Down

0 comments on commit 9409502

Please sign in to comment.