From 98df9652fdef954176d0741ba93347a4a92b4982 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Thu, 20 Jul 2023 10:21:06 -0500 Subject: [PATCH 01/21] Added TRGT.wdl Added task and workflow for running TRGT to genotype tandem repeats in each sample. Included URL to the catalog used on the Phase1 Long Reads data. The WDL currently uses a container stored in AWS's container registry. --- wdl/tasks/VariantCalling/TRGT.wdl | 90 +++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 wdl/tasks/VariantCalling/TRGT.wdl diff --git a/wdl/tasks/VariantCalling/TRGT.wdl b/wdl/tasks/VariantCalling/TRGT.wdl new file mode 100644 index 000000000..5ccddf74a --- /dev/null +++ b/wdl/tasks/VariantCalling/TRGT.wdl @@ -0,0 +1,90 @@ +version 1.0 + +workflow runTRGT { + + meta { + description: "Uses TRGT to size TRs in a bam file." + } + + input { + File input_bam + File input_bam_bai + String basename = basename(input_bam, ".bam") + String output_gs_path + File ref_dict + File ref_fasta + File ref_fasta_index + File repeatCatalog = "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed" + + #Optional runtime arguments + RuntimeAttr? runtime_attr_override + } + + call processWithTRGT { + input: + input_bam = input_bam, + input_bam_bai = input_bam_bai, + basename = basename, + ref_fasta = ref_fasta, + ref_fasta_index = ref_fasta_index, + ref_dict = ref_dict, + repeatCatalog = repeatCatalog, + runtime_attr_override = runtime_attr_override + } + + output { + File trgt_output_vcf = processWithTRGT.trgt_output_vcf + File trgt_output_bam = processWithTRGT.trgt_output_bam + } +} + +task processWithTRGT { + input { + File input_bam + File input_bam_bai + String basename + File ref_fasta + File ref_fasta_index + File ref_dict + File repeatCatalog + + RuntimeAttr? runtime_attr_override + + } + ######################### + RuntimeAttr default_attr = object { + cpu_cores: 4, + mem_gb: 16, + disk_gb: 500, + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 1, + docker: "public.ecr.aws/s5z5a3q9/lr-trgt:0.4.0" + } + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + + meta { + description: "Uses TRGT to size TRs in a bam file." + } + + command <<< + set -euo pipefail + trgt --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{runtime_attr.cpu_cores} --output-prefix ~{basename}_trgt + + >>> + + output { + File trgt_output_vcf = "~{basename}_trgt.vcf.gz" + File trgt_output_bam = "~{basename}_trgt.spanning.bam" + } + + runtime { + cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) + preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + docker: select_first([runtime_attr.docker, default_attr.docker]) + } +} \ No newline at end of file From 5f76777908eb5bd03eb41a939bfab94a2a0ac162 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Thu, 20 Jul 2023 10:25:29 -0500 Subject: [PATCH 02/21] Added folder to create lr-trgt docker image Added the lr-trgt folder with Dockerfile and Makefile which specifies how the lr-trgt docker image was created that is referenced in the TRGT.wdl task/workflow. The Makefile is currently configured to push a new build of this image to us.gcr.io/broad-dsp-lrma/lr-trgt. If done, the TRGT.wdl task should be updated to use that container as well. --- docker/lr-trgt/Dockerfile | 11 +++++++++++ docker/lr-trgt/Makefile | 15 +++++++++++++++ 2 files changed, 26 insertions(+) create mode 100644 docker/lr-trgt/Dockerfile create mode 100644 docker/lr-trgt/Makefile diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile new file mode 100644 index 000000000..48857fa3b --- /dev/null +++ b/docker/lr-trgt/Dockerfile @@ -0,0 +1,11 @@ +FROM us.gcr.io/broad-dsp-lrma/lr-pb:0.1.40 + +RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.4.0/trgt-v0.4.0-linux_x86_64.gz" -O "trgt.gz" && \ + gunzip trgt.gz && \ + chmod 777 trgt && \ + mv trgt /usr/bin/ + +RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.4.0/trvz-v0.4.0-linux_x86_64.gz" -O "trvz.gz" && \ + gunzip trvz.gz && \ + chmod 777 trvz && \ + mv trvz /usr/bin/ diff --git a/docker/lr-trgt/Makefile b/docker/lr-trgt/Makefile new file mode 100644 index 000000000..7450ea7d2 --- /dev/null +++ b/docker/lr-trgt/Makefile @@ -0,0 +1,15 @@ +VERSION = 0.4.0 +TAG1 = us.gcr.io/broad-dsp-lrma/lr-trgt:$(VERSION) +TAG2 = us.gcr.io/broad-dsp-lrma/lr-trgt:latest + +all: build push + +build: + docker build -t $(TAG1) -t $(TAG2) . + +build_no_cache: + docker build --no-cache -t $(TAG1) -t $(TAG2) . + +push: + docker push $(TAG1) + docker push $(TAG2) From d20883a5b55445f98db34f178f095427682386f9 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Thu, 20 Jul 2023 10:37:40 -0500 Subject: [PATCH 03/21] Add TRGT to PBCCSWholeGenome.wdl Added logic to run the TRGT sub-workflow as part of the HiFi variant calling workflow --- .../VariantCalling/PBCCSWholeGenome.wdl | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl index fb7a976c9..3f501f710 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl @@ -4,6 +4,7 @@ import "../../../tasks/Utility/PBUtils.wdl" as PB import "../../../tasks/Utility/Utils.wdl" as Utils import "../../../tasks/VariantCalling/CallVariantsPBCCS.wdl" as VAR import "../../../tasks/Utility/Finalize.wdl" as FF +import "../../../tasks/VariantCalling/TRGT.wdl" as TRGT import "../../../tasks/QC/SampleLevelAlignedMetrics.wdl" as COV @@ -31,6 +32,9 @@ workflow PBCCSWholeGenome { run_dv_pepper_analysis: "to turn on DV-Pepper analysis or not (non-trivial increase in cost and runtime)" ref_scatter_interval_list_locator: "A file holding paths to interval_list files; needed only when running DV-Pepper" ref_scatter_interval_list_ids: "A file that gives short IDs to the interval_list files; needed only when running DV-Pepper" + + call_trs: "whether to call TRs" + trs_catalog: "optionally specify a non-default catalog to use when calling TRs, for use with TRGT" } input { @@ -58,6 +62,9 @@ workflow PBCCSWholeGenome { Int? dvp_memory = 128 File? ref_scatter_interval_list_locator File? ref_scatter_interval_list_ids + + Boolean call_trs = true + File? trs_catalog } Map[String, String] ref_map = read_map(ref_map_file) @@ -167,6 +174,17 @@ workflow PBCCSWholeGenome { } } + if call_trs { + call TRGT.runTRGT { + input: + input_bam = bam, + input_bam_bai = bai, + output_gs_path = svdir, + ref_fasta = ref_map['fasta'], + ref_fasta_fai = ref_map['fai'], + ref_dict = ref_map['dict'] + } + output { File aligned_bam = FinalizeBam.gcs_path File aligned_bai = FinalizeBai.gcs_path @@ -205,5 +223,8 @@ workflow PBCCSWholeGenome { File? dvp_g_tbi = FinalizeDVPepperGTbi.gcs_path File? dvp_phased_vcf = FinalizeDVPEPPERPhasedVcf.gcs_path File? dvp_phased_tbi = FinalizeDVPEPPERPhasedTbi.gcs_path + + File? trgt_vcf = runTRGT.trgt_output_vcf + File? trgt_bam = runTRGT.trgt_output_bam } } From 72064196d227a611e6942e93737887a8f4e7e198 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 1 Aug 2023 15:15:19 -0500 Subject: [PATCH 04/21] Move TRGT to separate pipeline --- docker/lr-trgt/Dockerfile | 8 ++- .../VariantCalling/PBCCSWholeGenome.wdl | 19 ------ .../PacBio/VariantCalling/PBCCS_CallTRs.wdl | 42 ++++++++++++ wdl/tasks/VariantCalling/TRGT.wdl | 68 +++++-------------- 4 files changed, 66 insertions(+), 71 deletions(-) create mode 100644 wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile index 48857fa3b..77948569e 100644 --- a/docker/lr-trgt/Dockerfile +++ b/docker/lr-trgt/Dockerfile @@ -1,5 +1,8 @@ -FROM us.gcr.io/broad-dsp-lrma/lr-pb:0.1.40 +FROM ubuntu:20.04 +RUN apt-get update && \ + apt-get install -y wget + RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.4.0/trgt-v0.4.0-linux_x86_64.gz" -O "trgt.gz" && \ gunzip trgt.gz && \ chmod 777 trgt && \ @@ -9,3 +12,6 @@ RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.4.0/trv gunzip trvz.gz && \ chmod 777 trvz && \ mv trvz /usr/bin/ + +RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed" && \ + mv adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed /tmp/scratch/ diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl index 3f501f710..877b85faa 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCSWholeGenome.wdl @@ -4,8 +4,6 @@ import "../../../tasks/Utility/PBUtils.wdl" as PB import "../../../tasks/Utility/Utils.wdl" as Utils import "../../../tasks/VariantCalling/CallVariantsPBCCS.wdl" as VAR import "../../../tasks/Utility/Finalize.wdl" as FF -import "../../../tasks/VariantCalling/TRGT.wdl" as TRGT - import "../../../tasks/QC/SampleLevelAlignedMetrics.wdl" as COV workflow PBCCSWholeGenome { @@ -62,9 +60,6 @@ workflow PBCCSWholeGenome { Int? dvp_memory = 128 File? ref_scatter_interval_list_locator File? ref_scatter_interval_list_ids - - Boolean call_trs = true - File? trs_catalog } Map[String, String] ref_map = read_map(ref_map_file) @@ -174,17 +169,6 @@ workflow PBCCSWholeGenome { } } - if call_trs { - call TRGT.runTRGT { - input: - input_bam = bam, - input_bam_bai = bai, - output_gs_path = svdir, - ref_fasta = ref_map['fasta'], - ref_fasta_fai = ref_map['fai'], - ref_dict = ref_map['dict'] - } - output { File aligned_bam = FinalizeBam.gcs_path File aligned_bai = FinalizeBai.gcs_path @@ -223,8 +207,5 @@ workflow PBCCSWholeGenome { File? dvp_g_tbi = FinalizeDVPepperGTbi.gcs_path File? dvp_phased_vcf = FinalizeDVPEPPERPhasedVcf.gcs_path File? dvp_phased_tbi = FinalizeDVPEPPERPhasedTbi.gcs_path - - File? trgt_vcf = runTRGT.trgt_output_vcf - File? trgt_bam = runTRGT.trgt_output_bam } } diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl new file mode 100644 index 000000000..f1c3a87b2 --- /dev/null +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl @@ -0,0 +1,42 @@ +version 1.0 + +import "../../../tasks/VariantCalling/TRGT.wdl" as TRGT +import "../../../structs/Structs.wdl" + +workflow runTRGT { + + meta { + description: "Uses TRGT to size TRs in a bam file." + } + + input { + File input_bam + File input_bam_bai + String basename = basename(input_bam, ".bam") + String output_gs_path + File ref_dict + File ref_fasta + File ref_fasta_index + String repeatCatalog = "adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed" + + #Optional runtime arguments + RuntimeAttr? runtime_attr_override + } + + call TRGT.processWithTRGT as processWithTRGT { + input: + input_bam = input_bam, + input_bam_bai = input_bam_bai, + basename = basename, + ref_fasta = ref_fasta, + ref_fasta_index = ref_fasta_index, + ref_dict = ref_dict, + repeatCatalog = repeatCatalog, + runtime_attr_override = runtime_attr_override + } + + output { + File trgt_output_vcf = processWithTRGT.trgt_output_vcf + File trgt_output_bam = processWithTRGT.trgt_output_bam + } +} diff --git a/wdl/tasks/VariantCalling/TRGT.wdl b/wdl/tasks/VariantCalling/TRGT.wdl index 5ccddf74a..b506785f8 100644 --- a/wdl/tasks/VariantCalling/TRGT.wdl +++ b/wdl/tasks/VariantCalling/TRGT.wdl @@ -1,42 +1,6 @@ version 1.0 -workflow runTRGT { - - meta { - description: "Uses TRGT to size TRs in a bam file." - } - - input { - File input_bam - File input_bam_bai - String basename = basename(input_bam, ".bam") - String output_gs_path - File ref_dict - File ref_fasta - File ref_fasta_index - File repeatCatalog = "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed" - - #Optional runtime arguments - RuntimeAttr? runtime_attr_override - } - - call processWithTRGT { - input: - input_bam = input_bam, - input_bam_bai = input_bam_bai, - basename = basename, - ref_fasta = ref_fasta, - ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, - repeatCatalog = repeatCatalog, - runtime_attr_override = runtime_attr_override - } - - output { - File trgt_output_vcf = processWithTRGT.trgt_output_vcf - File trgt_output_bam = processWithTRGT.trgt_output_bam - } -} +import "../../structs/Structs.wdl" task processWithTRGT { input { @@ -46,30 +10,20 @@ task processWithTRGT { File ref_fasta File ref_fasta_index File ref_dict - File repeatCatalog + String repeatCatalog + Int cpuCores = 4 RuntimeAttr? runtime_attr_override } - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 4, - mem_gb: 16, - disk_gb: 500, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 1, - docker: "public.ecr.aws/s5z5a3q9/lr-trgt:0.4.0" - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - + meta { description: "Uses TRGT to size TRs in a bam file." } command <<< set -euo pipefail - trgt --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{runtime_attr.cpu_cores} --output-prefix ~{basename}_trgt + trgt --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt >>> @@ -77,6 +31,18 @@ task processWithTRGT { File trgt_output_vcf = "~{basename}_trgt.vcf.gz" File trgt_output_bam = "~{basename}_trgt.spanning.bam" } + + ######################### + RuntimeAttr default_attr = object { + cpu_cores: cpuCores, + mem_gb: 16, + disk_gb: 500, + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 1, + docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:0.4.0" + } + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) runtime { cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) From 53f8e5d7970c186c987dccbe31158c763048f84e Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 13:53:33 -0600 Subject: [PATCH 05/21] Update Dockerfile Updated trgt to v0.8.0 and added T2T repeat catalog --- docker/lr-trgt/Dockerfile | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile index 77948569e..d07fd0a12 100644 --- a/docker/lr-trgt/Dockerfile +++ b/docker/lr-trgt/Dockerfile @@ -2,16 +2,20 @@ FROM ubuntu:20.04 RUN apt-get update && \ apt-get install -y wget - -RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.4.0/trgt-v0.4.0-linux_x86_64.gz" -O "trgt.gz" && \ + +RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.8.0/trgt-v0.8.0-linux_x86_64.gz" -O "trgt.gz" && \ gunzip trgt.gz && \ chmod 777 trgt && \ mv trgt /usr/bin/ -RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.4.0/trvz-v0.4.0-linux_x86_64.gz" -O "trvz.gz" && \ +RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.8.0/trvz-v0.8.0-linux_x86_64.gz" -O "trvz.gz" && \ gunzip trvz.gz && \ chmod 777 trvz && \ mv trvz /usr/bin/ -RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed" && \ - mv adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed /tmp/scratch/ +RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" && \ + mv adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed /tmp/scratch/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed + +RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/T2T/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ + gunzip CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz && \ + mv CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed /tmp/scratch/ From d433589ad690fad519065c799a34c5006ba87198 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 13:59:17 -0600 Subject: [PATCH 06/21] Update TRGT.wdl Update TRGT to use v0.8.0 and to allow karyotype-specific analyses --- wdl/tasks/VariantCalling/TRGT.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/wdl/tasks/VariantCalling/TRGT.wdl b/wdl/tasks/VariantCalling/TRGT.wdl index b506785f8..70b3ac4be 100644 --- a/wdl/tasks/VariantCalling/TRGT.wdl +++ b/wdl/tasks/VariantCalling/TRGT.wdl @@ -9,9 +9,9 @@ task processWithTRGT { String basename File ref_fasta File ref_fasta_index - File ref_dict String repeatCatalog - Int cpuCores = 4 + String karyotype = "XX" + Int cpuCores = 16 RuntimeAttr? runtime_attr_override @@ -23,7 +23,7 @@ task processWithTRGT { command <<< set -euo pipefail - trgt --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt + trgt --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt --karyotype ~{karyotype} >>> @@ -40,7 +40,7 @@ task processWithTRGT { boot_disk_gb: 10, preemptible_tries: 3, max_retries: 1, - docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:0.4.0" + docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:0.8.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) @@ -53,4 +53,4 @@ task processWithTRGT { maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) docker: select_first([runtime_attr.docker, default_attr.docker]) } -} \ No newline at end of file +} From 3974b445181b6233ef1f0068800af141ee6eb7a3 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 13:59:41 -0600 Subject: [PATCH 07/21] Update Makefile Version bump --- docker/lr-trgt/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/lr-trgt/Makefile b/docker/lr-trgt/Makefile index 7450ea7d2..a92e50234 100644 --- a/docker/lr-trgt/Makefile +++ b/docker/lr-trgt/Makefile @@ -1,4 +1,4 @@ -VERSION = 0.4.0 +VERSION = 0.8.0 TAG1 = us.gcr.io/broad-dsp-lrma/lr-trgt:$(VERSION) TAG2 = us.gcr.io/broad-dsp-lrma/lr-trgt:latest From 673bb03fd57b88a95b3bc9a0023253644236659c Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 14:07:31 -0600 Subject: [PATCH 08/21] Update PBCCS_CallTRs.wdl updated trgt pipeline to support karyotype-specific calling --- wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl index f1c3a87b2..3182b0ad5 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl @@ -14,10 +14,11 @@ workflow runTRGT { File input_bam_bai String basename = basename(input_bam, ".bam") String output_gs_path - File ref_dict File ref_fasta File ref_fasta_index - String repeatCatalog = "adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0.bed" + String repeatCatalog = "adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" + String karyotype = "XX" + Int cpuCores = 16 #Optional runtime arguments RuntimeAttr? runtime_attr_override @@ -30,8 +31,9 @@ workflow runTRGT { basename = basename, ref_fasta = ref_fasta, ref_fasta_index = ref_fasta_index, - ref_dict = ref_dict, repeatCatalog = repeatCatalog, + karyotype = karyotype, + cpuCores = cpuCores, runtime_attr_override = runtime_attr_override } From 15adde177dc573d65b2bf78019e88a27e890f32c Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 14:10:51 -0600 Subject: [PATCH 09/21] Update PBCCS_CallTRs.wdl Correct repeat catalog filename --- wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl index 3182b0ad5..44bc62c6d 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl @@ -16,7 +16,7 @@ workflow runTRGT { String output_gs_path File ref_fasta File ref_fasta_index - String repeatCatalog = "adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" + String repeatCatalog = "GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" String karyotype = "XX" Int cpuCores = 16 From 12b04133a749e8a5aed92e66d88c65b7eeea9986 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 14:16:10 -0600 Subject: [PATCH 10/21] Create PBCCS_CallTRs_exampleInputs_GRCh38.json --- .../PBCCS_CallTRs_exampleInputs_GRCh38.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json new file mode 100644 index 000000000..94971e022 --- /dev/null +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json @@ -0,0 +1,10 @@ +{ + "runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19240/alignments/NA19240.bam", + "runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai", + "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/downsampledBamsTRGTOutput/", + "runTRGT.ref_dict": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.dict", + "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa", + "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai", + "runTRGT.trgt": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/trgt", + "runTRGT.repeatCatalog": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/egorCatalogWithFlankingSeq.bed" +} From f6f8c1057c527a5d3485aaa78c3a3fa0760221ee Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 14:18:51 -0600 Subject: [PATCH 11/21] Create PBCCS_CallTRs_exampleInputs_T2T.json T2T example --- .../VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json new file mode 100644 index 000000000..bc20e50ae --- /dev/null +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json @@ -0,0 +1,9 @@ +{ + "runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam", + "runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai", + "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/T2T/", + "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa", + "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa.fai", + "runTRGT.repeatCatalog": "CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", + "runTRGT.karyotype": "XX" +} From 37eaee2ab7b548533a2af2f788a97493c626e048 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 1 Mar 2024 14:19:51 -0600 Subject: [PATCH 12/21] Update PBCCS_CallTRs_exampleInputs_GRCh38.json Corrected values in example --- .../PBCCS_CallTRs_exampleInputs_GRCh38.json | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json index 94971e022..d046c64fb 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json @@ -1,10 +1,9 @@ { - "runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19240/alignments/NA19240.bam", - "runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai", - "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/downsampledBamsTRGTOutput/", - "runTRGT.ref_dict": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.dict", + "runTRGT.input_bam": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam", + "runTRGT.input_bam_bai": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/T2T/PBCCSWholeGenome/NA19240/alignments/NA19240.bam.bai", + "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/GRCh38/", "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa", "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai", - "runTRGT.trgt": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/trgt", - "runTRGT.repeatCatalog": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/egorCatalogWithFlankingSeq.bed" + "runTRGT.repeatCatalog": "GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", + "runTRGT.karyotype": "XX" } From 723b058444cf9829d30c1b5da1e720a1de714da1 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:34:56 -0500 Subject: [PATCH 13/21] Update to TRGT v1.0 in Docker image --- docker/lr-trgt/Dockerfile | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile index d07fd0a12..7bc3e3feb 100644 --- a/docker/lr-trgt/Dockerfile +++ b/docker/lr-trgt/Dockerfile @@ -3,15 +3,10 @@ FROM ubuntu:20.04 RUN apt-get update && \ apt-get install -y wget -RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.8.0/trgt-v0.8.0-linux_x86_64.gz" -O "trgt.gz" && \ - gunzip trgt.gz && \ - chmod 777 trgt && \ - mv trgt /usr/bin/ - -RUN wget "http://github.com/PacificBiosciences/trgt/releases/download/v0.8.0/trvz-v0.8.0-linux_x86_64.gz" -O "trvz.gz" && \ - gunzip trvz.gz && \ - chmod 777 trvz && \ - mv trvz /usr/bin/ +RUN wget "https://github.com/PacificBiosciences/trgt/releases/download/v1.0.0/trgt-v1.0.0-x86_64-unknown-linux-gnu.tar.gz" -O "trgt.tar.gz" && \ + tar -zxvf trgt.tar.gz && \ + chmod 777 trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt && \ + mv trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt /usr/bin/ RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" && \ mv adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed /tmp/scratch/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed From 78bfab53082ed02982a58886fcb3ee719ac8f0e0 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:35:21 -0500 Subject: [PATCH 14/21] Update Makefile for TRGT to v1.0 --- docker/lr-trgt/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/lr-trgt/Makefile b/docker/lr-trgt/Makefile index a92e50234..913d52b83 100644 --- a/docker/lr-trgt/Makefile +++ b/docker/lr-trgt/Makefile @@ -1,4 +1,4 @@ -VERSION = 0.8.0 +VERSION = 1.0.0 TAG1 = us.gcr.io/broad-dsp-lrma/lr-trgt:$(VERSION) TAG2 = us.gcr.io/broad-dsp-lrma/lr-trgt:latest From 0a895f0657211a84f8b746cbda9ecefd22cd23fd Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:44:41 -0500 Subject: [PATCH 15/21] Update TRGT.wdl to v1.0 --- wdl/tasks/VariantCalling/TRGT.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/wdl/tasks/VariantCalling/TRGT.wdl b/wdl/tasks/VariantCalling/TRGT.wdl index 70b3ac4be..b5fa69230 100644 --- a/wdl/tasks/VariantCalling/TRGT.wdl +++ b/wdl/tasks/VariantCalling/TRGT.wdl @@ -10,7 +10,7 @@ task processWithTRGT { File ref_fasta File ref_fasta_index String repeatCatalog - String karyotype = "XX" + Int is_female # 1=female, 0=male Int cpuCores = 16 RuntimeAttr? runtime_attr_override @@ -23,7 +23,9 @@ task processWithTRGT { command <<< set -euo pipefail - trgt --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt --karyotype ~{karyotype} + karyotype="XY" + if [[ ~{is_female} -eq 1 ]]; then karyotype="XX"; fi + trgt genotype --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt --karyotype ${karyotype} >>> @@ -40,7 +42,7 @@ task processWithTRGT { boot_disk_gb: 10, preemptible_tries: 3, max_retries: 1, - docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:0.8.0" + docker: "us.gcr.io/broad-dsp-lrma/lr-trgt:1.0.0" } RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) From c2c4a98c3b1305d143122b4f18c39552de6c740e Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:46:00 -0500 Subject: [PATCH 16/21] Update PBCCS_CallTRs.wdl to v1.0 --- wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl index 44bc62c6d..2cdcf2333 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl @@ -17,7 +17,7 @@ workflow runTRGT { File ref_fasta File ref_fasta_index String repeatCatalog = "GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" - String karyotype = "XX" + Int is_female # 1=female; 0=male Int cpuCores = 16 #Optional runtime arguments @@ -32,7 +32,7 @@ workflow runTRGT { ref_fasta = ref_fasta, ref_fasta_index = ref_fasta_index, repeatCatalog = repeatCatalog, - karyotype = karyotype, + is_female = is_female, cpuCores = cpuCores, runtime_attr_override = runtime_attr_override } From a54ed635133cfd48e267a9770648704b7cdc21df Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:49:30 -0500 Subject: [PATCH 17/21] Update Dockerfile --- docker/lr-trgt/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile index 7bc3e3feb..a903d653b 100644 --- a/docker/lr-trgt/Dockerfile +++ b/docker/lr-trgt/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 RUN apt-get update && \ apt-get install -y wget From aa669813bb92be011d5d72116c88af3bdda23bc5 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:54:37 -0500 Subject: [PATCH 18/21] Update Dockerfile --- docker/lr-trgt/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile index a903d653b..e9ba61a11 100644 --- a/docker/lr-trgt/Dockerfile +++ b/docker/lr-trgt/Dockerfile @@ -8,7 +8,8 @@ RUN wget "https://github.com/PacificBiosciences/trgt/releases/download/v1.0.0/tr chmod 777 trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt && \ mv trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt /usr/bin/ -RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" && \ +RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ + gunzip adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz && \ mv adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed /tmp/scratch/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/T2T/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ From 6c7fc741cd910bf16553cd3411190808e4f4d3d9 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:56:28 -0500 Subject: [PATCH 19/21] Update PBCCS_CallTRs_exampleInputs_GRCh38.json --- .../VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json index d046c64fb..382c7929f 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json @@ -5,5 +5,5 @@ "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa", "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai", "runTRGT.repeatCatalog": "GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", - "runTRGT.karyotype": "XX" + "runTRGT.is_female": 1 } From 269d0d97da0d0242d8d400e99bd1771692bba6e4 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Tue, 21 May 2024 15:56:49 -0500 Subject: [PATCH 20/21] Update PBCCS_CallTRs_exampleInputs_T2T.json --- .../PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json index bc20e50ae..92e21fc3e 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json @@ -5,5 +5,5 @@ "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa", "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa.fai", "runTRGT.repeatCatalog": "CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", - "runTRGT.karyotype": "XX" + "runTRGT.is_female": 1 } From 5b796bc61430757ddaecf106fe0dfb9643e259b6 Mon Sep 17 00:00:00 2001 From: mdanzi Date: Fri, 7 Jun 2024 12:43:18 -0500 Subject: [PATCH 21/21] Add bgzipped output files --- docker/lr-trgt/Dockerfile | 34 +++++++++++++++---- .../PacBio/VariantCalling/PBCCS_CallTRs.wdl | 3 +- .../PBCCS_CallTRs_exampleInputs_GRCh38.json | 2 +- .../PBCCS_CallTRs_exampleInputs_T2T.json | 2 +- wdl/tasks/VariantCalling/TRGT.wdl | 4 +++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/docker/lr-trgt/Dockerfile b/docker/lr-trgt/Dockerfile index e9ba61a11..7d0649c10 100644 --- a/docker/lr-trgt/Dockerfile +++ b/docker/lr-trgt/Dockerfile @@ -1,17 +1,39 @@ FROM ubuntu:22.04 RUN apt-get update && \ - apt-get install -y wget + apt-get install -y wget bzip2 gcc g++ zlib1g-dev libbz2-dev make ncurses-dev liblzma-dev libcurl4-openssl-dev libssl-dev RUN wget "https://github.com/PacificBiosciences/trgt/releases/download/v1.0.0/trgt-v1.0.0-x86_64-unknown-linux-gnu.tar.gz" -O "trgt.tar.gz" && \ tar -zxvf trgt.tar.gz && \ chmod 777 trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt && \ - mv trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt /usr/bin/ + mv trgt-v1.0.0-x86_64-unknown-linux-gnu/trgt /usr/bin/ && \ + rm -r trgt-v1.0.0-x86_64-unknown-linux-gnu/ + +RUN wget "https://github.com/samtools/bcftools/releases/download/1.20/bcftools-1.20.tar.bz2" && \ + bunzip2 bcftools-1.20.tar.bz2 && \ + tar -xvf bcftools-1.20.tar && \ + cd bcftools-1.20/ && \ + ./configure && \ + make && \ + make install && \ + cd .. && \ + rm -r bcftools-1.20/ && \ + rm bcftools-1.20.tar + +RUN wget "https://github.com/samtools/htslib/releases/download/1.20/htslib-1.20.tar.bz2" && \ + bunzip2 htslib-1.20.tar.bz2 && \ + tar -xvf htslib-1.20.tar && \ + cd htslib-1.20/ && \ + ./configure && \ + make && \ + make install && \ + cd .. && \ + rm -r htslib-1.20/ && \ + rm htslib-1.20.tar RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ - gunzip adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz && \ - mv adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed /tmp/scratch/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed + gunzip adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz RUN wget "https://zuchnerlab.s3.amazonaws.com/RepeatExpansions/TRGT/T2T/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz" && \ - gunzip CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz && \ - mv CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed /tmp/scratch/ + gunzip CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed.gz + diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl index 2cdcf2333..c2dfcb1a2 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs.wdl @@ -16,7 +16,7 @@ workflow runTRGT { String output_gs_path File ref_fasta File ref_fasta_index - String repeatCatalog = "GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed" + String repeatCatalog Int is_female # 1=female; 0=male Int cpuCores = 16 @@ -39,6 +39,7 @@ workflow runTRGT { output { File trgt_output_vcf = processWithTRGT.trgt_output_vcf + File trgt_output_vcf_idx = processWithTRGT.trgt_output_vcf_idx File trgt_output_bam = processWithTRGT.trgt_output_bam } } diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json index 382c7929f..73c2f3798 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_GRCh38.json @@ -4,6 +4,6 @@ "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/GRCh38/", "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa", "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/GRCh38_full_analysis_set_plus_decoy_hla.fa.fai", - "runTRGT.repeatCatalog": "GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", + "runTRGT.repeatCatalog": "/GRCh38.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", "runTRGT.is_female": 1 } diff --git a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json index 92e21fc3e..2676e5bea 100644 --- a/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json +++ b/wdl/pipelines/PacBio/VariantCalling/PBCCS_CallTRs_exampleInputs_T2T.json @@ -4,6 +4,6 @@ "runTRGT.output_gs_path": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/TRGT/T2T/", "runTRGT.ref_fasta": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa", "runTRGT.ref_fasta_index": "gs://fc-c1de2dd5-0f83-49c8-84ee-0f153c4d0a59/resources/chm13v2.0.ebv.fa.fai", - "runTRGT.repeatCatalog": "CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", + "runTRGT.repeatCatalog": "/CHM13.adotto_TRregions_TRGTFormatWithFlankingSeq_v1.0_under1kb.bed", "runTRGT.is_female": 1 } diff --git a/wdl/tasks/VariantCalling/TRGT.wdl b/wdl/tasks/VariantCalling/TRGT.wdl index b5fa69230..f3deedd0a 100644 --- a/wdl/tasks/VariantCalling/TRGT.wdl +++ b/wdl/tasks/VariantCalling/TRGT.wdl @@ -26,11 +26,15 @@ task processWithTRGT { karyotype="XY" if [[ ~{is_female} -eq 1 ]]; then karyotype="XX"; fi trgt genotype --genome ~{ref_fasta} --repeats ~{repeatCatalog} --reads ~{input_bam} --threads ~{cpuCores} --output-prefix ~{basename}_trgt --karyotype ${karyotype} + bcftools sort -Ob -o ~{basename}_trgt_sorted.vcf.gz ~{basename}_trgt.vcf.gz + mv ~{basename}_trgt_sorted.vcf.gz ~{basename}_trgt.vcf.gz + bcftools index -t ~{basename}_trgt.vcf.gz >>> output { File trgt_output_vcf = "~{basename}_trgt.vcf.gz" + File trgt_output_vcf_idx = "~{basename}_trgt.vcf.gz.tbi" File trgt_output_bam = "~{basename}_trgt.spanning.bam" }