From 9409502620ea7f521a2035a241dfce24c9ab3900 Mon Sep 17 00:00:00 2001 From: pdimens Date: Fri, 26 Apr 2024 09:59:47 -0400 Subject: [PATCH] update bcftools dep to 1.20 and update logic --- .github/filters.yml | 2 +- src/harpy/helperfunctions.py | 2 +- src/harpy/phase.py | 4 ++-- src/harpy/rules/{phase-pop.smk => phase.smk} | 13 ++----------- 4 files changed, 6 insertions(+), 15 deletions(-) rename src/harpy/rules/{phase-pop.smk => phase.smk} (97%) diff --git a/.github/filters.yml b/.github/filters.yml index 653ec4fa4..f44992fa7 100644 --- a/.github/filters.yml +++ b/.github/filters.yml @@ -106,7 +106,7 @@ naibr: - 'src/harpy/globalscripts/inferSV.py' phase: - *common - - 'src/harpy/rules/phase-pop.smk' + - 'src/harpy/rules/phase.smk' - '**phase.py' - 'test/bam/**' - 'test/vcf/test.bcf' diff --git a/src/harpy/helperfunctions.py b/src/harpy/helperfunctions.py index 2e85b1480..63a57453d 100644 --- a/src/harpy/helperfunctions.py +++ b/src/harpy/helperfunctions.py @@ -23,7 +23,7 @@ def generate_conda_deps(): environ = { "qc" : ["falco", "fastp", "multiqc", "pysam=0.22"], "align": ["bwa", "ema","icu","libzlib", "minimap2", "samtools=1.19", "seqtk", "xz"], - "variants.snp": ["bcftools=1.19", "freebayes=1.3.6"], + "variants.snp": ["bcftools=1.20", "freebayes=1.3.6"], "variants.sv": ["leviathan", "naibr-plus"], "phase" : ["hapcut2", "whatshap"], "simulations" : ["perl", "perl-math-random", "perl-inline-c", "perl-parse-recdescent", "numpy", "dwgsim", "alienzj::msort"], diff --git a/src/harpy/phase.py b/src/harpy/phase.py index 85ca3bbae..abe71d996 100644 --- a/src/harpy/phase.py +++ b/src/harpy/phase.py @@ -38,7 +38,7 @@ def phase(input, output_dir, vcf, threads, molecule_distance, prune_threshold, v workflowdir = f"{output_dir}/workflow" command = f'snakemake --rerun-incomplete --rerun-triggers input mtime params --nolock --software-deployment-method conda --conda-prefix ./.snakemake/conda --cores {threads} --directory .'.split() command.append('--snakefile') - command.append(f"{workflowdir}/phase-pop.smk") + command.append(f"{workflowdir}/phase.smk") command.append("--configfile") command.append(f"{workflowdir}/config.yml") if quiet: @@ -58,7 +58,7 @@ def phase(input, output_dir, vcf, threads, molecule_distance, prune_threshold, v validate_bamfiles(f"{workflowdir}/input", samplenames) if genome: validate_input_by_ext(genome, "--genome", [".fasta", ".fa", ".fasta.gz", ".fa.gz"]) - fetch_rule(workflowdir, "phase-pop.smk") + fetch_rule(workflowdir, "phase.smk") fetch_report(workflowdir, "HapCut2.Rmd") prune_threshold /= 100 diff --git a/src/harpy/rules/phase-pop.smk b/src/harpy/rules/phase.smk similarity index 97% rename from src/harpy/rules/phase-pop.smk rename to src/harpy/rules/phase.smk index 75b6e80c3..c85f49fde 100644 --- a/src/harpy/rules/phase-pop.smk +++ b/src/harpy/rules/phase.smk @@ -219,21 +219,12 @@ rule merge_samples: output: bcf = outdir + "/variants.phased.bcf", idx = outdir + "/variants.phased.bcf.csi" - params: - "true" if len(samplenames) > 1 else "false" threads: 30 message: "Combining results into {output.bcf}" if len(samplenames) > 1 else "Copying results to {output.bcf}" shell: - """ - if [ "{params}" = true ]; then - bcftools merge --threads {threads} -Ob -o {output.bcf} --write-index {input.bcf} - else - cp {input.bcf} {output.bcf} - cp {input.idx} {output.idx} - fi - """ + "bcftools merge --threads {threads} -Ob -o {output.bcf} --force-single --write-index {input.bcf}" rule summarize_blocks: input: @@ -292,7 +283,7 @@ rule log_workflow: _ = f.write(" awk '!/\n') _ = f.write(' ##FORMAT=\n')