diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile new file mode 100644 index 000000000..7d0649c10 --- /dev/null +++ b/docker/lr-trgt/Dockerfile @@ -0,0 +1,39 @@ +FROM ubuntu:22.04 + +RUN apt-get update && \ + apt-get install -y wget bzip2 gcc g++ zlib1g-dev libbz2-dev make ncurses-dev liblzma-dev libcurl4-openssl-dev libssl-dev + +RUN wget "https://github.com/PacificBiosciences/trgt/releases/download/v1.0.0/trgt-v1.0.0-x86_64-unknown-linux-gnu.tar.gz" -O "trgt.tar.gz" && \ + tar -zxvf trgt.tar.gz && \ + chmod 777 trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt && \ + mv trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt /usr/bin/ && \ + rm -r trgt-v1.0.0-x86_64-unknown-linux-gnu/ + +RUN wget "https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2" && \ + bunzip2 bcftools-1.20.tar.bz2 && \ + tar -xvf bcftools-1.20.tar && \ + cd bcftools-1.20/ && \ + ./configure && \ + make && \ + make install && \ + cd .. && \ + rm -r bcftools-1.20/ && \ + rm bcftools-1.20.tar + +RUN wget "https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2" && \ + bunzip2 htslib-1.20.tar.bz2 && \ + tar -xvf htslib-1.20.tar && \ + cd htslib-1.20/ && \ + ./configure && \ + make && \ + make install && \ + cd .. && \ + rm -r htslib-1.20/ && \ + rm htslib-1.20.tar + +RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ + gunzip adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz + +RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/T2T/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ + gunzip CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz + diff --git a/docker/lr-trgt/Makefile b/docker/lr-trgt/Makefile new file mode 100644 index 000000000..913d52b83 --- /dev/null +++ b/docker/lr-trgt/Makefile @@ -0,0 +1,15 @@ +VERSION = 1.0.0 +TAG1 = us.gcr.io/broad-dsp-lrma/lr-trgt:$(VERSION) +TAG2 = us.gcr.io/broad-dsp-lrma/lr-trgt:latest + +all: build push + +build: + docker build -t $(TAG1) -t $(TAG2) . + +build_no_cache: + docker build --no-cache -t $(TAG1) -t $(TAG2) . + +push: + docker push $(TAG1) + docker push $(TAG2) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl index fb7a976c9..877b85faa 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl @@ -4,7 +4,6 @@ import "../../../tasks/Utility/PBUtils.wdl" as PB import "../../../tasks/Utility/Utils.wdl" as Utils import "../../../tasks/VariantCalling/CallVariantsPBCCS.wdl" as VAR import "../../../tasks/Utility/Finalize.wdl" as FF - import "../../../tasks/QC/SampleLevelAlignedMetrics.wdl" as COV workflow PBCCSWholeGenome { @@ -31,6 +30,9 @@ workflow PBCCSWholeGenome { run_dv_pepper_analysis: "to turn on DV-Pepper analysis or not (non-trivial increase in cost and runtime)" ref_scatter_interval_list_locator: "A file holding paths to interval_list files; needed only when running DV-Pepper" ref_scatter_interval_list_ids: "A file that gives short IDs to the interval_list files; needed only when running DV-Pepper" + + call_trs: "whether to call TRs" + trs_catalog: "optionally specify a non-default catalog to use when calling TRs, for use with TRGT" } input { diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl new file mode 100644 index 000000000..c2dfcb1a2 --- /dev/null +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl @@ -0,0 +1,45 @@ +version 1.0 + +import "../../../tasks/VariantCalling/TRGT.wdl" as TRGT +import "../../../structs/Structs.wdl" + +workflow runTRGT { + + meta { + description: "Uses TRGT to size TRs in a bam file." + } + + input { + File input_bam + File input_bam_bai + String basename = basename(input_bam, ".bam") + String output_gs_path + File ref_fasta + File ref_fasta_index + String repeatCatalog + Int is_female # 1=female; 0=male + Int cpuCores = 16 + + #Optional runtime arguments + RuntimeAttr? runtime_attr_override + } + + call TRGT.processWithTRGT as processWithTRGT { + input: + input_bam = input_bam, + input_bam_bai = input_bam_bai, + basename = basename, + ref_fasta = ref_fasta, + ref_fasta_index = ref_fasta_index, + repeatCatalog = repeatCatalog, + is_female = is_female, + cpuCores = cpuCores, + runtime_attr_override = runtime_attr_override + } + + output { + File trgt_output_vcf = processWithTRGT.trgt_output_vcf + File trgt_output_vcf_idx = processWithTRGT.trgt_output_vcf_idx + File trgt_output_bam = processWithTRGT.trgt_output_bam + } +} diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json new file mode 100644 index 000000000..73c2f3798 --- /dev/null +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json @@ -0,0 +1,9 @@ +{ + "runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam", + "runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai", + "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/GRCh38/", + "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa", + "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai", + "runTRGT.repeatCatalog": "/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", + "runTRGT.is_female": 1 +} diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json new file mode 100644 index 000000000..2676e5bea --- /dev/null +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json @@ -0,0 +1,9 @@ +{ + "runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam", + "runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai", + "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/T2T/", + "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa", + "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa.fai", + "runTRGT.repeatCatalog": "/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", + "runTRGT.is_female": 1 +} diff --git a/wdl/tasks/VariantCalling/TRGT.wdl b/wdl/tasks/VariantCalling/TRGT.wdl new file mode 100644 index 000000000..f3deedd0a --- /dev/null +++ b/wdl/tasks/VariantCalling/TRGT.wdl @@ -0,0 +1,62 @@ +version 1.0 + +import "../../structs/Structs.wdl" + +task processWithTRGT { + input { + File input_bam + File input_bam_bai + String basename + File ref_fasta + File ref_fasta_index + String repeatCatalog + Int is_female # 1=female, 0=male + Int cpuCores = 16 + + RuntimeAttr? runtime_attr_override + + } + + meta { + description: "Uses TRGT to size TRs in a bam file." + } + + command <<< + set -euo pipefail + karyotype="XY" + if [[ ~{is_female} -eq 1 ]]; then karyotype="XX"; fi + trgt genotype --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt --karyotype ${karyotype} + bcftools sort -Ob -o ~{basename}_trgt_sorted.vcf.gz ~{basename}_trgt.vcf.gz + mv ~{basename}_trgt_sorted.vcf.gz ~{basename}_trgt.vcf.gz + bcftools index -t ~{basename}_trgt.vcf.gz + + >>> + + output { + File trgt_output_vcf = "~{basename}_trgt.vcf.gz" + File trgt_output_vcf_idx = "~{basename}_trgt.vcf.gz.tbi" + File trgt_output_bam = "~{basename}_trgt.spanning.bam" + } + + ######################### + RuntimeAttr default_attr = object { + cpu_cores: cpuCores, + mem_gb: 16, + disk_gb: 500, + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 1, + docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:1.0.0" + } + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + + runtime { + cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) + preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + docker: select_first([runtime_attr.docker, default_attr.docker]) + } +}