-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #6 from UW-GAC/merge-vcf-workflow
Add workflow to merge VCFs
- Loading branch information
Showing
5 changed files
with
163 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
{ | ||
"bcftools_merge.vcf_files": [ | ||
["tmp/AMR_subset1000_chr22.vcf.gz", "tmp/EUR_subset1000_chr22.vcf.gz"] | ||
], | ||
"bcftools_merge.output_prefixes": [ | ||
"merged_chr22" | ||
], | ||
"bcftools_merge.missing_to_ref": false | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
version 1.0 | ||
|
||
|
||
workflow bcftools_merge { | ||
input { | ||
Array[Array[File]] vcf_files | ||
Array[String] output_prefixes | ||
Boolean missing_to_ref = false | ||
String? merge_options | ||
Int mem_gb = 16 | ||
} | ||
|
||
scatter (pair in zip(vcf_files, output_prefixes)) { | ||
|
||
scatter (vcf_file in pair.left) { | ||
call create_index_file { | ||
input: vcf_file = vcf_file, | ||
output_prefix = "index" | ||
} | ||
} | ||
|
||
call merge_vcfs { | ||
input: vcf_files = pair.left, | ||
out_prefix = pair.right, | ||
mem_gb = mem_gb, | ||
missing_to_ref = missing_to_ref, | ||
merge_options = merge_options, | ||
index_files = create_index_file.index_file | ||
} | ||
} | ||
|
||
output { | ||
Array[File] out_files = merge_vcfs.out_file | ||
Array[File] out_index_files = merge_vcfs.out_index_file | ||
} | ||
|
||
meta { | ||
author: "Adrienne Stilp" | ||
email: "[email protected]" | ||
} | ||
} | ||
|
||
task create_index_file { | ||
|
||
input { | ||
File vcf_file | ||
String? output_prefix = "index" | ||
} | ||
|
||
Int disk_size = ceil(2 * size(vcf_file, "GB")) + 2 | ||
|
||
command <<< | ||
|
||
echo {~vcf_file} | ||
|
||
bcftools index \ | ||
~{vcf_file} \ | ||
-o ~{output_prefix}.csi | ||
>>> | ||
|
||
output { | ||
File index_file = "~{output_prefix}.csi" | ||
} | ||
|
||
runtime { | ||
docker: "nanozoo/bcftools:1.19--1dccf69" | ||
disks: "local-disk " + disk_size + " SSD" | ||
} | ||
} | ||
|
||
task merge_vcfs { | ||
input { | ||
Array[File] vcf_files | ||
Array[File] index_files | ||
String? merge_options | ||
String out_prefix | ||
Int mem_gb = 16 | ||
Boolean missing_to_ref = false | ||
} | ||
|
||
Int disk_size = ceil(3*(size(vcf_files, "GB"))) + 10 | ||
|
||
command <<< | ||
set -e -o pipefail | ||
|
||
echo "writing input file" | ||
VCF_ARRAY=(~{sep=" " vcf_files}) # Load array into bash variable | ||
INDEX_ARRAY=(~{sep=" " index_files}) # Load array into bash variable | ||
for idx in ${!VCF_ARRAY[*]} | ||
do | ||
echo "${VCF_ARRAY[$idx]}##idx##${INDEX_ARRAY[$idx]}" | ||
done > files.txt | ||
|
||
echo "printing files to merge" | ||
cat files.txt | ||
|
||
|
||
echo "Merging files..." | ||
# Merge files. | ||
bcftools merge \ | ||
-l files.txt \ | ||
~{if missing_to_ref then "--missing-to-ref" else ""} \ | ||
~{if defined(merge_options) then merge_options else ""} \ | ||
-o ~{out_prefix}.vcf.gz \ | ||
--write-index | ||
>>> | ||
|
||
output { | ||
File out_file = "~{out_prefix}.vcf.gz" | ||
File out_index_file = "~{out_prefix}.vcf.gz.csi" | ||
} | ||
|
||
runtime { | ||
docker: "nanozoo/bcftools:1.19--1dccf69" | ||
disks: "local-disk " + disk_size + " SSD" | ||
memory: mem_gb + " GB" | ||
|
||
} | ||
} |