Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TRGT Special-purpose caller #412

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions docker/lr-trgt/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
FROM ubuntu:22.04

RUN apt-get update && \
apt-get install -y wget bzip2 gcc g++ zlib1g-dev libbz2-dev make ncurses-dev liblzma-dev libcurl4-openssl-dev libssl-dev

RUN wget "https://github.com/PacificBiosciences/trgt/releases/download/v1.0.0/trgt-v1.0.0-x86_64-unknown-linux-gnu.tar.gz" -O "trgt.tar.gz" && \
tar -zxvf trgt.tar.gz && \
chmod 777 trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt && \
mv trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt /usr/bin/ && \
rm -r trgt-v1.0.0-x86_64-unknown-linux-gnu/

RUN wget "https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2" && \
bunzip2 bcftools-1.20.tar.bz2 && \
tar -xvf bcftools-1.20.tar && \
cd bcftools-1.20/ && \
./configure && \
make && \
make install && \
cd .. && \
rm -r bcftools-1.20/ && \
rm bcftools-1.20.tar

RUN wget "https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2" && \
bunzip2 htslib-1.20.tar.bz2 && \
tar -xvf htslib-1.20.tar && \
cd htslib-1.20/ && \
./configure && \
make && \
make install && \
cd .. && \
rm -r htslib-1.20/ && \
rm htslib-1.20.tar

RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \
gunzip adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz

RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/T2T/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \
gunzip CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz

15 changes: 15 additions & 0 deletions docker/lr-trgt/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
VERSION = 1.0.0
TAG1 = us.gcr.io/broad-dsp-lrma/lr-trgt:$(VERSION)
TAG2 = us.gcr.io/broad-dsp-lrma/lr-trgt:latest

all: build push

build:
docker build -t $(TAG1) -t $(TAG2) .

build_no_cache:
docker build --no-cache -t $(TAG1) -t $(TAG2) .

push:
docker push $(TAG1)
docker push $(TAG2)
4 changes: 3 additions & 1 deletion wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import "../../../tasks/Utility/PBUtils.wdl" as PB
import "../../../tasks/Utility/Utils.wdl" as Utils
import "../../../tasks/VariantCalling/CallVariantsPBCCS.wdl" as VAR
import "../../../tasks/Utility/Finalize.wdl" as FF

import "../../../tasks/QC/SampleLevelAlignedMetrics.wdl" as COV

workflow PBCCSWholeGenome {
Expand All @@ -31,6 +30,9 @@ workflow PBCCSWholeGenome {
run_dv_pepper_analysis: "to turn on DV-Pepper analysis or not (non-trivial increase in cost and runtime)"
ref_scatter_interval_list_locator: "A file holding paths to interval_list files; needed only when running DV-Pepper"
ref_scatter_interval_list_ids: "A file that gives short IDs to the interval_list files; needed only when running DV-Pepper"

call_trs: "whether to call TRs"
trs_catalog: "optionally specify a non-default catalog to use when calling TRs, for use with TRGT"
}

input {
Expand Down
45 changes: 45 additions & 0 deletions wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
version 1.0

import "../../../tasks/VariantCalling/TRGT.wdl" as TRGT
import "../../../structs/Structs.wdl"

workflow runTRGT {

meta {
description: "Uses TRGT to size TRs in a bam file."
}

input {
File input_bam
File input_bam_bai
String basename = basename(input_bam, ".bam")
String output_gs_path
File ref_fasta
File ref_fasta_index
String repeatCatalog
Int is_female # 1=female; 0=male
Int cpuCores = 16

#Optional runtime arguments
RuntimeAttr? runtime_attr_override
}

call TRGT.processWithTRGT as processWithTRGT {
input:
input_bam = input_bam,
input_bam_bai = input_bam_bai,
basename = basename,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
repeatCatalog = repeatCatalog,
is_female = is_female,
cpuCores = cpuCores,
runtime_attr_override = runtime_attr_override
}

output {
File trgt_output_vcf = processWithTRGT.trgt_output_vcf
File trgt_output_vcf_idx = processWithTRGT.trgt_output_vcf_idx
File trgt_output_bam = processWithTRGT.trgt_output_bam
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam",
"runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai",
"runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/GRCh38/",
"runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa",
"runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai",
"runTRGT.repeatCatalog": "/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed",
"runTRGT.is_female": 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam",
"runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai",
"runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/T2T/",
"runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa",
"runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa.fai",
"runTRGT.repeatCatalog": "/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed",
"runTRGT.is_female": 1
}
62 changes: 62 additions & 0 deletions wdl/tasks/VariantCalling/TRGT.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
version 1.0

import "../../structs/Structs.wdl"

task processWithTRGT {
input {
File input_bam
File input_bam_bai
String basename
File ref_fasta
File ref_fasta_index
String repeatCatalog
Int is_female # 1=female, 0=male
Int cpuCores = 16

RuntimeAttr? runtime_attr_override

}

meta {
description: "Uses TRGT to size TRs in a bam file."
}

command <<<
set -euo pipefail
karyotype="XY"
if [[ ~{is_female} -eq 1 ]]; then karyotype="XX"; fi
trgt genotype --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt --karyotype ${karyotype}
bcftools sort -Ob -o ~{basename}_trgt_sorted.vcf.gz ~{basename}_trgt.vcf.gz
mv ~{basename}_trgt_sorted.vcf.gz ~{basename}_trgt.vcf.gz
bcftools index -t ~{basename}_trgt.vcf.gz

>>>

output {
File trgt_output_vcf = "~{basename}_trgt.vcf.gz"
File trgt_output_vcf_idx = "~{basename}_trgt.vcf.gz.tbi"
File trgt_output_bam = "~{basename}_trgt.spanning.bam"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: cpuCores,
mem_gb: 16,
disk_gb: 500,
boot_disk_gb: 10,
preemptible_tries: 3,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:1.0.0"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])

runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}