From 24a12fff48f6e20f96ad8d8bc2b713f13b66feaf Mon Sep 17 00:00:00 2001
From: bshifaw <bshifaw@broadinstitute.com>
Date: Fri, 5 Apr 2019 13:21:23 -0400
Subject: [PATCH] Dev (#17)

* updated to GATK4.1.1.0

* minor edits

* changed import to dev for pon

* updated filter bias variable

* updated output variable names, removed artifact_mode from normal-normal

* changed index to idx

* removed artifact_modes

* updated import to next release: 2.4.0
---
 mutect2-normal-normal.inputs.json |   2 +-
 mutect2-normal-normal.wdl         |  40 +-
 mutect2.exome.inputs.json         |  16 +-
 mutect2.wdl                       | 673 ++++++++++++++++--------------
 mutect2_nio.wdl                   | 565 +++++++++++++------------
 mutect2_pon.inputs.json           |   6 +-
 mutect2_pon.wdl                   | 115 +++--
 7 files changed, 760 insertions(+), 657 deletions(-)

diff --git a/mutect2-normal-normal.inputs.json b/mutect2-normal-normal.inputs.json
index 647e2cd..7ada8b7 100644
--- a/mutect2-normal-normal.inputs.json
+++ b/mutect2-normal-normal.inputs.json
@@ -34,6 +34,6 @@
   "Mutect2NormalNormal.scatter_count": "10",
 
   "##_COMMENT4": "Docker",
-  "Mutect2NormalNormal.gatk_docker": "broadinstitute/gatk:4.0.8.1"
+  "Mutect2NormalNormal.gatk_docker": "broadinstitute/gatk:4.1.1.0"
 }
 
diff --git a/mutect2-normal-normal.wdl b/mutect2-normal-normal.wdl
index 1032958..9d46177 100644
--- a/mutect2-normal-normal.wdl
+++ b/mutect2-normal-normal.wdl
@@ -11,7 +11,7 @@
 ## - False Positive VCF files and its index with summary
 ##
 ## Cromwell version support
-## - Successfully tested on v30
+## - Successfully tested on v36
 ##
 ## LICENSING :
 ## This script is released under the WDL source code license (BSD-3) (see LICENSE in
@@ -21,7 +21,7 @@
 ## pages at https://hub.docker.com/r/broadinstitute/* for detailed licensing information
 ## pertaining to the included programs.
 
-import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-snvs-indels/2.3.0/mutect2_nio.wdl" as m2
+import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-snvs-indels/2.4.0/mutect2_nio.wdl" as m2
 
 workflow Mutect2NormalNormal {
 	File? intervals
@@ -34,26 +34,25 @@ workflow Mutect2NormalNormal {
 	File? pon
 	File? gnomad
 	File? variants_for_contamination
-	Boolean? run_orientation_bias_filter
-	Array[String]? artifact_modes
+	Boolean? run_orientation_bias_mixture_model_filter
 	File? realignment_index_bundle
-    String? realignment_extra_args
+	String? realignment_extra_args
 	String? m2_extra_args
-    String? m2_extra_filtering_args
-    Boolean? make_bamout
+	String? m2_extra_filtering_args
+	Boolean? make_bamout
 
-    File? gatk_override
-    String gatk_docker
-    Int? preemptible_attempts
+	File? gatk_override
+	String gatk_docker
+	Int? preemptible_attempts
 
-    Array[Pair[File,File]] bam_pairs = cross(bams, bams)
-    Array[Pair[File,File]] bai_pairs = cross(bais, bais)
+	Array[Pair[File,File]] bam_pairs = cross(bams, bams)
+	Array[Pair[File,File]] bai_pairs = cross(bais, bais)
 
 	scatter(n in range(length(bam_pairs))) {
 	    File tumor_bam = bam_pairs[n].left
 	    File normal_bam = bam_pairs[n].right
 	    File tumor_bai = bai_pairs[n].left
-        File normal_bai = bai_pairs[n].right
+            File normal_bai = bai_pairs[n].right
 
         if (tumor_bam != normal_bam) {
             call m2.Mutect2 {
@@ -62,17 +61,16 @@ workflow Mutect2NormalNormal {
                     ref_fasta = ref_fasta,
                     ref_fai = ref_fai,
                     ref_dict = ref_dict,
-                    tumor_bam = tumor_bam,
-                    tumor_bai = tumor_bai,
-                    normal_bam = normal_bam,
-                    normal_bai = normal_bai,
+                    tumor_reads = tumor_bam,
+                    tumor_reads_index = tumor_bai,
+                    normal_reads = normal_bam,
+                    normal_reads_index = normal_bai,
                     pon = pon,
                     scatter_count = scatter_count,
                     gnomad = gnomad,
                     variants_for_contamination = variants_for_contamination,
-                    run_orientation_bias_filter = run_orientation_bias_filter,
+                    run_orientation_bias_mixture_model_filter = run_orientation_bias_mixture_model_filter,
                     preemptible_attempts = preemptible_attempts,
-                    artifact_modes = artifact_modes,
                     realignment_index_bundle = realignment_index_bundle,
                     realignment_extra_args = realignment_extra_args,
                     m2_extra_args = m2_extra_args,
@@ -88,7 +86,7 @@ workflow Mutect2NormalNormal {
                     ref_fai = ref_fai,
                     ref_dict = ref_dict,
                     filtered_vcf = Mutect2.filtered_vcf,
-                    filtered_vcf_index = Mutect2.filtered_vcf_index,
+                    filtered_vcf_index = Mutect2.filtered_vcf_idx,
                     gatk_override = gatk_override,
                     gatk_docker = gatk_docker
             }
@@ -100,7 +98,7 @@ workflow Mutect2NormalNormal {
 	output {
 		File summary = GatherTables.summary
 		Array[File] false_positives_vcfs = select_all(Mutect2.filtered_vcf)
-		Array[File] false_positives_vcf_indices = select_all(Mutect2.filtered_vcf_index)
+		Array[File] false_positives_vcf_indices = select_all(Mutect2.filtered_vcf_idx)
 	}
 }
 
diff --git a/mutect2.exome.inputs.json b/mutect2.exome.inputs.json
index c117129..7d6e462 100644
--- a/mutect2.exome.inputs.json
+++ b/mutect2.exome.inputs.json
@@ -1,7 +1,7 @@
 {
   "##_COMMENT1": "Runtime",
   "##Mutect2.oncotator_docker": "(optional) String?",
-  "Mutect2.gatk_docker": "broadinstitute/gatk:4.1.0.0",
+  "Mutect2.gatk_docker": "broadinstitute/gatk:4.1.1.0",
 
   "##_COMMENT2": "Workflow options",
   "Mutect2.intervals": "gs://gatk-best-practices/somatic-b37/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.baits.interval_list",
@@ -16,18 +16,18 @@
   "Mutect2.ref_fasta": "gs://gatk-best-practices/somatic-b37/Homo_sapiens_assembly19.fasta",
   "Mutect2.ref_dict": "gs://gatk-best-practices/somatic-b37/Homo_sapiens_assembly19.dict",
   "Mutect2.ref_fai": "gs://gatk-best-practices/somatic-b37/Homo_sapiens_assembly19.fasta.fai",
-  "Mutect2.normal_bam": "gs://gatk-best-practices/somatic-b37/HCC1143_normal.bam",
-  "Mutect2.normal_bai": "gs://gatk-best-practices/somatic-b37/HCC1143_normal.bai",
-  "Mutect2.tumor_bam": "gs://gatk-best-practices/somatic-b37/HCC1143.bam",
-  "Mutect2.tumor_bai": "gs://gatk-best-practices/somatic-b37/HCC1143.bai",
+  "Mutect2.normal_reads": "gs://gatk-best-practices/somatic-b37/HCC1143_normal.bam",
+  "Mutect2.normal_reads_index": "gs://gatk-best-practices/somatic-b37/HCC1143_normal.bai",
+  "Mutect2.tumor_reads": "gs://gatk-best-practices/somatic-b37/HCC1143.bam",
+  "Mutect2.tumor_reads_index": "gs://gatk-best-practices/somatic-b37/HCC1143.bai",
 
   "##_COMMENT4": "Primary resources",
   "Mutect2.pon": "gs://gatk-best-practices/somatic-b37/Mutect2-exome-panel.vcf",
-  "Mutect2.pon_index": "gs://gatk-best-practices/somatic-b37/Mutect2-exome-panel.vcf.idx",
+  "Mutect2.pon_idx": "gs://gatk-best-practices/somatic-b37/Mutect2-exome-panel.vcf.idx",
   "Mutect2.gnomad": "gs://gatk-best-practices/somatic-b37/af-only-gnomad.raw.sites.vcf",
-  "Mutect2.gnomad_index": "gs://gatk-best-practices/somatic-b37/af-only-gnomad.raw.sites.vcf.idx",
+  "Mutect2.gnomad_idx": "gs://gatk-best-practices/somatic-b37/af-only-gnomad.raw.sites.vcf.idx",
   "Mutect2.variants_for_contamination": "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf",
-  "Mutect2.variants_for_contamination_index": "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf.idx",
+  "Mutect2.variants_for_contamination_idx": "gs://gatk-best-practices/somatic-b37/small_exac_common_3.vcf.idx",
   "##Mutect2.realignment_index_bundle": "File? (optional)",
 
   "##_COMMENT5": "Secondary resources",
diff --git a/mutect2.wdl b/mutect2.wdl
index 8467258..99118b6 100644
--- a/mutect2.wdl
+++ b/mutect2.wdl
@@ -8,7 +8,7 @@
 ##
 ## Description of inputs:
 ##
-## ** Runtime ** 
+## ** Runtime **
 ## gatk_docker, oncotator_docker: docker images to use for GATK 4 Mutect2 and for Oncotator
 ## preemptible_attempts: how many preemptions to tolerate before switching to a non-preemptible machine (on Google)
 ## max_retries: how many times to retry failed tasks -- very important on the cloud when there are transient errors
@@ -19,11 +19,9 @@
 ## ** Workflow options **
 ## intervals: genomic intervals (will be used for scatter)
 ## scatter_count: number of parallel jobs to generate when scattering over intervals
-## artifact_modes: types of artifacts to consider in the orientation bias filter (optional)
 ## m2_extra_args, m2_extra_filtering_args: additional arguments for Mutect2 calling and filtering (optional)
 ## split_intervals_extra_args: additional arguments for splitting intervals before scattering (optional)
-## run_orientation_bias_filter: (deprecated) if true, run the orientation bias filter (optional)
-## run_orientation_bias_mixture_model_filter: if true, filter orientation bias sites based on the posterior probabilities computed by the read orientation artifact mixture model (optional)
+## run_orientation_bias_mixture_model_filter: (optional) if true, filter orientation bias sites with the read orientation artifact mixture model.
 ## run_oncotator: if true, annotate the M2 VCFs using oncotator (to produce a TCGA MAF).  Important:  This requires a
 ##                   docker image and should  not be run in environments where docker is unavailable (e.g. SGE cluster on
 ##                   a Broad on-prem VM).  Access to docker hub is also required, since the task downloads a public docker image.
@@ -35,9 +33,9 @@
 ## normal_bam, normal_bam_index: BAM and index for the normal sample
 ##
 ## ** Primary resources ** (optional but strongly recommended)
-## pon, pon_index: optional panel of normals in VCF format containing probable technical artifacts (false positves)
-## gnomad, gnomad_index: optional database of known germline variants (see http://gnomad.broadinstitute.org/downloads)
-## variants_for_contamination, variants_for_contamination_index: VCF of common variants with allele frequencies for calculating contamination
+## pon, pon_idx: optional panel of normals (and its index) in VCF format containing probable technical artifacts (false positves)
+## gnomad, gnomad_idx: optional database of known germline variants (and its index) (see http://gnomad.broadinstitute.org/downloads)
+## variants_for_contamination, variants_for_contamination_idx: VCF of common variants (and its index)with allele frequencies for calculating contamination
 ##
 ## ** Secondary resources ** (for optional tasks)
 ## onco_ds_tar_gz, default_config_file: Oncotator datasources and config file
@@ -47,18 +45,24 @@
 ##
 ## Funcotator parameters (see Funcotator help for more details).
 ## funco_reference_version: "hg19" for hg19 or b37.  "hg38" for hg38.  Default: "hg19"
-## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
+## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file.  Default: "MAF"
+## funco_compress: (Only valid if funco_output_format == "VCF" )  If true, will compress the output of Funcotator.  If false, produces an uncompressed output file.  Default: false
+## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance).  If false, will not annotate with gnomAD.  Default: false
 ## funco_transcript_selection_mode: How to select transcripts in Funcotator.  ALL, CANONICAL, or BEST_EFFECT
+## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
 ## funco_data_sources_tar_gz:  Funcotator datasources tar gz file.  Bucket location is recommended when running on the cloud.
 ## funco_annotation_defaults:  Default values for annotations, when values are unspecified.  Specified as  <ANNOTATION>:<VALUE>.  For example:  "Center:Broad"
 ## funco_annotation_overrides:  Values for annotations, even when values are unspecified.  Specified as  <ANNOTATION>:<VALUE>.  For example:  "Center:Broad"
+## funcotator_excluded_fields:  Annotations that should not appear in the output (VCF or MAF).  Specified as  <ANNOTATION>.  For example:  "ClinVar_ALLELEID"
+## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column).  If false, will annotate all variants in the input file.  Default: true
+## funcotator_extra_args: Any additional arguments to pass to Funcotator.  Default: ""
 ##
 ## Outputs :
 ## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
 ##   file of reassembled reads if requested
 ##
 ## Cromwell version support
-## - Successfully tested on v29
+## - Successfully tested on v34
 ##
 ## LICENSING :
 ## This script is released under the WDL source code license (BSD-3) (see LICENSE in
@@ -73,26 +77,21 @@ workflow Mutect2 {
     File ref_fasta
     File ref_fai
     File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
+    File tumor_reads
+    File tumor_reads_index
+    File? normal_reads
+    File? normal_reads_index
     File? pon
-    File? pon_index
+    File? pon_idx
     Int scatter_count
     File? gnomad
-    File? gnomad_index
+    File? gnomad_idx
     File? variants_for_contamination
-    File? variants_for_contamination_index
+    File? variants_for_contamination_idx
     File? realignment_index_bundle
     String? realignment_extra_args
-    Boolean? run_orientation_bias_filter
-    Boolean run_ob_filter = select_first([run_orientation_bias_filter, false]) && (length(select_first([artifact_modes, ["G/T", "C/T"]])) > 0)
     Boolean? run_orientation_bias_mixture_model_filter
-    Boolean run_ob_mm_filter = select_first([run_orientation_bias_mixture_model_filter, false])
-    File? ob_mm_filter_training_intervals
-    Array[String]? artifact_modes
-    File? tumor_sequencing_artifact_metrics
+    Boolean run_ob_filter = select_first([run_orientation_bias_mixture_model_filter, false])
     String? m2_extra_args
     String? m2_extra_filtering_args
     String? split_intervals_extra_args
@@ -111,21 +110,30 @@ workflow Mutect2 {
     String? sequencing_center
     String? sequence_source
     File? default_config_file
+    String? oncotator_extra_args
 
-    # funcotator inputs
+    # Funcotator inputs
     Boolean? run_funcotator
     Boolean run_funcotator_or_default = select_first([run_funcotator, false])
     String? funco_reference_version
+    String? funco_output_format
+    Boolean? funco_compress
+    Boolean? funco_use_gnomad_AF
     File? funco_data_sources_tar_gz
     String? funco_transcript_selection_mode
     File? funco_transcript_selection_list
     Array[String]? funco_annotation_defaults
     Array[String]? funco_annotation_overrides
+    Array[String]? funcotator_excluded_fields
+    Boolean? funco_filter_funcotations
+    String? funcotator_extra_args
+
+    String funco_default_output_format = "MAF"
 
-    File? gatk_override
 
     # runtime
     String gatk_docker
+    File? gatk_override
     String basic_bash_docker = "ubuntu:16.04"
     String? oncotator_docker
     String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.9.0"])
@@ -133,8 +141,6 @@ workflow Mutect2 {
     Boolean filter_oncotator_maf_or_default = select_first([filter_oncotator_maf, true])
     Boolean? filter_funcotations
     Boolean filter_funcotations_or_default = select_first([filter_funcotations, true])
-    String? oncotator_extra_args
-    String? funcotator_extra_args
 
     Int? preemptible_attempts
     Int? max_retries
@@ -144,9 +150,9 @@ workflow Mutect2 {
 
     # Disk sizes used for dynamic sizing
     Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_dict, "GB") + size(ref_fai, "GB"))
-    Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bai, "GB"))
-    Int gnomad_vcf_size = if defined(gnomad) then ceil(size(gnomad, "GB") + size(gnomad_index, "GB")) else 0
-    Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bai, "GB")) else 0
+    Int tumor_reads_size = ceil(size(tumor_reads, "GB") + size(tumor_reads_index, "GB"))
+    Int gnomad_vcf_size = if defined(gnomad) then ceil(size(gnomad, "GB")) else 0
+    Int normal_reads_size = if defined(normal_reads) then ceil(size(normal_reads, "GB") + size(normal_reads_index, "GB")) else 0
 
     # If no tar is provided, the task downloads one from broads ftp server
     Int onco_tar_size = if defined(onco_ds_tar_gz) then ceil(size(onco_ds_tar_gz, "GB") * 3) else 100
@@ -161,15 +167,58 @@ workflow Mutect2 {
     # Small is for metrics/other vcfs
     Float large_input_to_output_multiplier = 2.25
     Float small_input_to_output_multiplier = 2.0
+    Float cram_to_bam_multiplier = 6.0
 
     # logic about output file names -- these are the names *without* .vcf extensions
-    String output_basename = basename(tumor_bam, ".bam")
+    String output_basename = basename(basename(tumor_reads, ".bam"),".cram")  #hacky way to strip either .bam or .cram
     String unfiltered_name = output_basename + "-unfiltered"
     String filtered_name = output_basename + "-filtered"
     String funcotated_name = output_basename + "-funcotated"
 
-    String output_vcf_name = basename(tumor_bam, ".bam") + ".vcf"
+    String output_vcf_name = output_basename + ".vcf"
 
+    # Size M2 differently based on if we are using NIO or not
+    Int tumor_cram_to_bam_disk = ceil(tumor_reads_size * cram_to_bam_multiplier)
+    Int normal_cram_to_bam_disk = ceil(normal_reads_size * cram_to_bam_multiplier)
+
+    if (basename(tumor_reads) != basename(tumor_reads, ".cram")) {
+        call CramToBam as TumorCramToBam {
+            input:
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                cram = tumor_reads,
+                crai = tumor_reads_index,
+                name = output_basename,
+                disk_size = tumor_cram_to_bam_disk
+        }
+    }
+
+    String normal_or_empty = select_first([normal_reads, ""])
+    if (basename(normal_or_empty) != basename(normal_or_empty, ".cram")) {
+        String normal_basename = basename(basename(normal_or_empty, ".bam"),".cram")
+        call CramToBam as NormalCramToBam {
+            input:
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                cram = normal_reads,
+                crai = normal_reads_index,
+                name = normal_basename,
+                disk_size = normal_cram_to_bam_disk
+        }
+    }
+
+    File tumor_bam = select_first([TumorCramToBam.output_bam, tumor_reads])
+    File tumor_bai = select_first([TumorCramToBam.output_bai, tumor_reads_index])
+    File? normal_bam = if defined(normal_reads) then select_first([NormalCramToBam.output_bam, normal_reads]) else normal_reads
+    File? normal_bai = if defined(normal_reads) then select_first([NormalCramToBam.output_bai, normal_reads_index]) else normal_reads_index
+
+    Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bai, "GB"))
+    Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bai, "GB")) else 0
+
+    Int m2_output_size = tumor_bam_size / scatter_count
+    Int m2_per_scatter_size = (tumor_bam_size + normal_bam_size) + ref_size + gnomad_vcf_size + m2_output_size + disk_pad
 
     call SplitIntervals {
         input:
@@ -186,7 +235,6 @@ workflow Mutect2 {
             disk_space = ref_size + ceil(size(intervals, "GB") * small_input_to_output_multiplier) + disk_pad
     }
 
-    Int m2_output_size = tumor_bam_size / scatter_count
     scatter (subintervals in SplitIntervals.interval_files ) {
         call M2 {
             input:
@@ -199,20 +247,22 @@ workflow Mutect2 {
                 normal_bam = normal_bam,
                 normal_bai = normal_bai,
                 pon = pon,
-                pon_index = pon_index,
+                pon_idx = pon_idx,
                 gnomad = gnomad,
-                gnomad_index = gnomad_index,
+                gnomad_idx = gnomad_idx,
                 preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
                 m2_extra_args = m2_extra_args,
+                variants_for_contamination = variants_for_contamination,
+                variants_for_contamination_idx = variants_for_contamination_idx,
                 make_bamout = make_bamout_or_default,
-                artifact_prior_table = LearnReadOrientationModel.artifact_prior_table,
+                run_ob_filter = run_ob_filter,
                 compress = compress,
                 gga_vcf = gga_vcf,
                 gga_vcf_idx = gga_vcf_idx,
                 gatk_override = gatk_override,
                 gatk_docker = gatk_docker,
-                disk_space = tumor_bam_size + normal_bam_size + ref_size + gnomad_vcf_size + m2_output_size + disk_pad
+                disk_space = m2_per_scatter_size
         }
 
         Float sub_vcf_size = size(M2.unfiltered_vcf, "GB")
@@ -226,10 +276,21 @@ workflow Mutect2 {
             max_retries = max_retries
     }
 
+    if (run_ob_filter) {
+        call LearnReadOrientationModel {
+            input:
+                f1r2_tar_gz = M2.f1r2_counts,
+                gatk_override = gatk_override,
+                gatk_docker = gatk_docker,
+                preemptible_attempts = preemptible_attempts,
+                max_retries = max_retries
+        }
+    }
+
     call MergeVCFs {
         input:
             input_vcfs = M2.unfiltered_vcf,
-            input_vcf_indices = M2.unfiltered_vcf_index,
+            input_vcf_indices = M2.unfiltered_vcf_idx,
             output_name = unfiltered_name,
             compress = compress,
             gatk_override = gatk_override,
@@ -261,111 +322,75 @@ workflow Mutect2 {
         }
     }
 
-    if (run_ob_filter && !defined(tumor_sequencing_artifact_metrics)) {
-        call CollectSequencingArtifactMetrics {
-            input:
-                gatk_docker = gatk_docker,
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
-                preemptible_attempts = preemptible_attempts,
-                max_retries = max_retries,
-                tumor_bam = tumor_bam,
-                tumor_bai = tumor_bai,
-                gatk_override = gatk_override,
-                disk_space = tumor_bam_size + ref_size + disk_pad
-        }
+    call MergeStats {
+        input:
+             stats = M2.stats,
+             gatk_override = gatk_override,
+             gatk_docker = gatk_docker
     }
 
-    if (run_ob_mm_filter) {
-        call CollectF1R2Counts {
+    if (defined(variants_for_contamination)) {
+        call MergePileupSummaries as MergeTumorPileups {
             input:
-                gatk_docker = gatk_docker,
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
+                input_tables = M2.tumor_pileups,
+                output_name = output_basename,
                 ref_dict = ref_dict,
-                preemptible_attempts = preemptible_attempts,
-                tumor_bam = tumor_bam,
-                tumor_bai = tumor_bai,
-                gatk_override = gatk_override,
-                disk_space = tumor_bam_size + ref_size + disk_pad,
-                intervals = if defined(ob_mm_filter_training_intervals) then ob_mm_filter_training_intervals else intervals,
-                max_retries = max_retries
-        }
-
-        call LearnReadOrientationModel {
-            input:
-                alt_table = CollectF1R2Counts.alt_table,
-                ref_histogram = CollectF1R2Counts.ref_histogram,
-                alt_histograms = CollectF1R2Counts.alt_histograms,
-                tumor_sample = CollectF1R2Counts.tumor_sample,
                 gatk_override = gatk_override,
                 gatk_docker = gatk_docker,
                 preemptible_attempts = preemptible_attempts,
-                max_retries = max_retries
+                max_retries = max_retries,
+                disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
+        }
+
+        if (defined(normal_bam)){
+            call MergePileupSummaries as MergeNormalPileups {
+                input:
+                    input_tables = M2.normal_pileups,
+                    output_name = output_basename,
+                    ref_dict = ref_dict,
+                    gatk_override = gatk_override,
+                    gatk_docker = gatk_docker,
+                    preemptible_attempts = preemptible_attempts,
+                    max_retries = max_retries,
+                    disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
+            }
         }
-    }
 
-    if (defined(variants_for_contamination)) {
         call CalculateContamination {
             input:
                 gatk_override = gatk_override,
-                intervals = intervals,
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
-                ref_dict = ref_dict,
                 preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
                 gatk_docker = gatk_docker,
-                tumor_bam = tumor_bam,
-                tumor_bai = tumor_bai,
-                normal_bam = normal_bam,
-                normal_bai = normal_bai,
-                variants_for_contamination = variants_for_contamination,
-                variants_for_contamination_index = variants_for_contamination_index,
+                tumor_pileups = MergeTumorPileups.merged_table,
+                normal_pileups = MergeNormalPileups.merged_table,
                 disk_space = tumor_bam_size + normal_bam_size + ceil(size(variants_for_contamination, "GB") * small_input_to_output_multiplier) + disk_pad
         }
     }
 
     call Filter {
         input:
+            ref_fasta = ref_fasta,
+            ref_fai = ref_fai,
+            ref_dict = ref_dict,
             gatk_override = gatk_override,
             gatk_docker = gatk_docker,
             intervals = intervals,
             unfiltered_vcf = MergeVCFs.merged_vcf,
-            unfiltered_vcf_index = MergeVCFs.merged_vcf_index,
+            unfiltered_vcf_idx = MergeVCFs.merged_vcf_idx,
             output_name = filtered_name,
             compress = compress,
             preemptible_attempts = preemptible_attempts,
+            mutect_stats = MergeStats.merged_stats,
             max_retries = max_retries,
             contamination_table = CalculateContamination.contamination_table,
             maf_segments = CalculateContamination.maf_segments,
+            artifact_priors_tar_gz = LearnReadOrientationModel.artifact_prior_table,
             m2_extra_filtering_args = m2_extra_filtering_args,
             disk_space = ceil(size(MergeVCFs.merged_vcf, "GB") * small_input_to_output_multiplier) + disk_pad
     }
 
-    if (run_ob_filter) {
-        # Get the metrics either from the workflow input or CollectSequencingArtifactMetrics if no workflow input is provided
-        File input_artifact_metrics = select_first([tumor_sequencing_artifact_metrics, CollectSequencingArtifactMetrics.pre_adapter_metrics])
-
-        call FilterByOrientationBias {
-            input:
-                gatk_override = gatk_override,
-                input_vcf = Filter.filtered_vcf,
-                input_vcf_index = Filter.filtered_vcf_index,
-                output_name = filtered_name,
-                compress = compress,
-                gatk_docker = gatk_docker,
-                preemptible_attempts = preemptible_attempts,
-                max_retries = max_retries,
-                pre_adapter_metrics = input_artifact_metrics,
-                artifact_modes = artifact_modes,
-                disk_space = ceil(size(Filter.filtered_vcf, "GB") * small_input_to_output_multiplier) + ceil(size(input_artifact_metrics, "GB")) + disk_pad
-        }
-    }
-
     if (defined(realignment_index_bundle)) {
-        File realignment_filter_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File realignment_filter_input_idx = select_first([FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
         call FilterAlignmentArtifacts {
             input:
                 gatk_override = gatk_override,
@@ -377,13 +402,13 @@ workflow Mutect2 {
                 max_retries = max_retries,
                 compress = compress,
                 output_name = filtered_name,
-                input_vcf = realignment_filter_input,
-                input_vcf_idx = realignment_filter_input_idx
+                input_vcf = Filter.filtered_vcf,
+                input_vcf_idx = Filter.filtered_vcf_idx
         }
     }
 
     if (run_oncotator_or_default) {
-        File oncotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
+        File oncotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
         call oncotate_m2 {
             input:
                 m2_vcf = oncotate_vcf_input,
@@ -404,44 +429,91 @@ workflow Mutect2 {
     }
 
     if (run_funcotator_or_default) {
-        File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
-        call FuncotateMaf {
+        File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
+        File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_idx, Filter.filtered_vcf_idx])
+        call Funcotate {
             input:
-                input_vcf = funcotate_vcf_input,
-                input_vcf_idx = funcotate_vcf_input_index,
                 ref_fasta = ref_fasta,
-                ref_fasta_index = ref_fai,
+                ref_fai = ref_fai,
                 ref_dict = ref_dict,
+                input_vcf = funcotate_vcf_input,
+                input_vcf_idx = funcotate_vcf_input_index,
                 reference_version = select_first([funco_reference_version, "hg19"]),
+                output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated",
+                output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format,
+                compress = if defined(funco_compress) then funco_compress else false,
+                use_gnomad = if defined(funco_use_gnomad_AF) then funco_use_gnomad_AF else false,
                 data_sources_tar_gz = funco_data_sources_tar_gz,
                 case_id = M2.tumor_sample[0],
                 control_id = M2.normal_sample[0],
+                sequencing_center = sequencing_center,
+                sequence_source = sequence_source,
                 transcript_selection_mode = funco_transcript_selection_mode,
                 transcript_selection_list = funco_transcript_selection_list,
                 annotation_defaults = funco_annotation_defaults,
                 annotation_overrides = funco_annotation_overrides,
+                funcotator_excluded_fields = funcotator_excluded_fields,
+                filter_funcotations = filter_funcotations_or_default,
+                extra_args = funcotator_extra_args,
                 gatk_docker = gatk_docker,
                 gatk_override = gatk_override,
-                filter_funcotations = filter_funcotations_or_default,
-                sequencing_center = sequencing_center,
-                sequence_source = sequence_source,
-                disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad,
+                preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
-                extra_args = funcotator_extra_args
+                disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad
         }
     }
 
     output {
-        File filtered_vcf = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File filtered_vcf_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
+        File filtered_vcf = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
+        File filtered_vcf_idx = select_first([FilterAlignmentArtifacts.filtered_vcf_idx, Filter.filtered_vcf_idx])
+        File filtering_stats = Filter.filtering_stats
+        File mutect_stats = MergeStats.merged_stats
         File? contamination_table = CalculateContamination.contamination_table
+
         File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
-        File? funcotated_maf = FuncotateMaf.funcotated_output
-        File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics
+        File? funcotated_file = Funcotate.funcotated_output_file
+        File? funcotated_file_index = Funcotate.funcotated_output_file_index
         File? bamout = MergeBamOuts.merged_bam_out
         File? bamout_index = MergeBamOuts.merged_bam_out_index
         File? maf_segments = CalculateContamination.maf_segments
+        File? read_orientation_model_params = LearnReadOrientationModel.artifact_prior_table
+    }
+}
+
+task CramToBam {
+
+    File ref_fasta
+    File ref_fai
+    File ref_dict
+    File cram
+    File crai
+    String name
+    Int disk_size
+    Int? mem
+
+    Int machine_mem = if defined(mem) then mem * 1000 else 6000
+
+    #Calls samtools view to do the conversion
+    command {
+        #Set -e and -o says if any command I run fails in this script, make sure to return a failure
+        set -e
+        set -o pipefail
+
+        samtools view -h -T ${ref_fasta} ${cram} |
+            samtools view -b -o ${name}.bam -
+        samtools index -b ${name}.bam
+        mv ${name}.bam.bai ${name}.bai
+    }
+
+    runtime {
+        docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+        memory: machine_mem + " MB"
+        disks: "local-disk " + disk_size + " HDD"
+    }
+
+    output {
+        File output_bam = "${name}.bam"
+        File output_bai = "${name}.bai"
     }
 }
 
@@ -480,7 +552,7 @@ task SplitIntervals {
             -scatter ${scatter_count} \
             -O interval-files \
             ${split_intervals_extra_args}
-        cp interval-files/*.intervals .
+        cp interval-files/*.interval_list .
     }
 
     runtime {
@@ -494,7 +566,7 @@ task SplitIntervals {
     }
 
     output {
-        Array[File] interval_files = glob("*.intervals")
+        Array[File] interval_files = glob("*.interval_list")
     }
 }
 
@@ -509,18 +581,22 @@ task M2 {
     File? normal_bam
     File? normal_bai
     File? pon
-    File? pon_index
+    File? pon_idx
     File? gnomad
-    File? gnomad_index
+    File? gnomad_idx
     String? m2_extra_args
     Boolean? make_bamout
+    Boolean? run_ob_filter
     Boolean compress
     File? gga_vcf
     File? gga_vcf_idx
-    File? artifact_prior_table
+    File? variants_for_contamination
+    File? variants_for_contamination_idx
 
     String output_vcf = "output" + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
+
+    String output_stats = output_vcf + ".stats"
 
     File? gatk_override
 
@@ -545,12 +621,13 @@ task M2 {
 
         # We need to create these files regardless, even if they stay empty
         touch bamout.bam
+        touch f1r2.tar.gz
         echo "" > normal_name.txt
 
         gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode
         tumor_command_line="-I ${tumor_bam} -tumor `cat tumor_name.txt`"
 
-        if [[ -f "${normal_bam}" ]]; then
+        if [[ ! -z "${normal_bam}" ]]; then
             gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${normal_bam} -O normal_name.txt -encode
             normal_command_line="-I ${normal_bam} -normal `cat normal_name.txt`"
         fi
@@ -562,11 +639,26 @@ task M2 {
             ${"--germline-resource " + gnomad} \
             ${"-pon " + pon} \
             ${"-L " + intervals} \
-            ${"--genotyping-mode GENOTYPE_GIVEN_ALLELES --alleles " + gga_vcf} \
+            ${"--alleles " + gga_vcf} \
             -O "${output_vcf}" \
             ${true='--bam-output bamout.bam' false='' make_bamout} \
-            ${"--orientation-bias-artifact-priors " + artifact_prior_table} \
+            ${true='--f1r2-tar-gz f1r2.tar.gz' false='' run_ob_filter} \
             ${m2_extra_args}
+
+        ### GetPileupSummaries
+        # These must be created, even if they remain empty, as cromwell doesn't support optional output
+        touch tumor-pileups.table
+        touch normal-pileups.table
+
+        if [[ ! -z "${variants_for_contamination}" ]]; then
+            gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
+                -V ${variants_for_contamination} -L ${variants_for_contamination} -O tumor-pileups.table
+
+            if [[ ! -z "${normal_bam}" ]]; then
+                gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
+                    -V ${variants_for_contamination} -L ${variants_for_contamination} -O normal-pileups.table
+            fi
+        fi
     >>>
 
     runtime {
@@ -575,16 +667,20 @@ task M2 {
         memory: machine_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
     output {
         File unfiltered_vcf = "${output_vcf}"
-        File unfiltered_vcf_index = "${output_vcf_index}"
+        File unfiltered_vcf_idx = "${output_vcf_idx}"
         File output_bamOut = "bamout.bam"
         String tumor_sample = read_string("tumor_name.txt")
         String normal_sample = read_string("normal_name.txt")
+        File stats = "${output_stats}"
+        File f1r2_counts = "f1r2.tar.gz"
+        File tumor_pileups = "tumor-pileups.table"
+        File normal_pileups = "normal-pileups.table"
     }
 }
 
@@ -595,7 +691,7 @@ task MergeVCFs {
     String output_name
     Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
 
     File? gatk_override
 
@@ -626,13 +722,13 @@ task MergeVCFs {
         memory: machine_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
     output {
         File merged_vcf = "${output_vcf}"
-        File merged_vcf_index = "${output_vcf_index}"
+        File merged_vcf_idx = "${output_vcf_idx}"
     }
 }
 
@@ -682,7 +778,7 @@ task MergeBamOuts {
         memory: machine_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
@@ -692,13 +788,10 @@ task MergeBamOuts {
     }
 }
 
-# This task is deprecated and is no longer supported
-task CollectSequencingArtifactMetrics {
+
+task MergeStats {
     # inputs
-    File ref_fasta
-    File ref_fai
-    File tumor_bam
-    File tumor_bai
+    Array[File]+ stats
 
     File? gatk_override
 
@@ -712,70 +805,62 @@ task CollectSequencingArtifactMetrics {
     Boolean use_ssd = false
 
     # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
+    Int machine_mem = if defined(mem) then mem * 1000 else 2000
     Int command_mem = machine_mem - 1000
 
     command {
         set -e
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-        gatk --java-options "-Xmx${command_mem}m" CollectSequencingArtifactMetrics \
-            -I ${tumor_bam} -O "gatk" -R ${ref_fasta} -VALIDATION_STRINGENCY LENIENT
+
+
+        gatk --java-options "-Xmx${command_mem}m" MergeMutectStats \
+            -stats ${sep=" -stats " stats} -O merged.stats
+
     }
 
     runtime {
         docker: gatk_docker
         bootDiskSizeGb: 12
         memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
+        disks: "local-disk " + select_first([disk_space, 10]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
     output {
-        File pre_adapter_metrics = "gatk.pre_adapter_detail_metrics"
+        File merged_stats = "merged.stats"
     }
 }
 
-task CollectF1R2Counts {
-    # input
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-
+task MergePileupSummaries {
+    # input_tables needs to be optional because GetPileupSummaries is in an if-block
+    Array[File?] input_tables
+    String output_name
     File? gatk_override
-    File? intervals
+    File ref_dict
 
     # runtime
-    Int? max_retries
     String gatk_docker
     Int? mem
     Int? preemptible_attempts
+    Int? max_retries
     Int? disk_space
     Int? cpu
     Boolean use_ssd = false
 
     # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
+    Int machine_mem = if defined(mem) then mem * 1000 else 3500
     Int command_mem = machine_mem - 1000
 
     command {
         set -e
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-        
-        # Get the sample name. The task M2 retrieves this information too, but it must be done separately here
-        # to avoid a cyclic dependency
-        gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode
-        tumor_name=$(head -n 1 tumor_name.txt)
-
-        gatk --java-options "-Xmx${command_mem}m" CollectF1R2Counts \
-        -I ${tumor_bam} -R ${ref_fasta} \
-        ${"-L " + intervals} \
-        -alt-table "$tumor_name-alt.tsv" \
-        -ref-hist "$tumor_name-ref.metrics" \
-        -alt-hist "$tumor_name-alt-depth1.metrics"
+
+        gatk --java-options "-Xmx${command_mem}m" GatherPileupSummaries \
+        --sequence-dictionary ${ref_dict} \
+        -I ${sep=' -I ' input_tables} \
+        -O ${output_name}.tsv
     }
 
     runtime {
@@ -789,21 +874,14 @@ task CollectF1R2Counts {
     }
 
     output {
-        File alt_table = glob("*-alt.tsv")[0]
-        File ref_histogram = glob("*-ref.metrics")[0]
-        File alt_histograms = glob("*-alt-depth1.metrics")[0]
-        String tumor_sample = read_string("tumor_name.txt")
+        File merged_table = "${output_name}.tsv"
     }
 }
 
+# Learning step of the orientation bias mixture model, which is the recommended orientation bias filter as of September 2018
 task LearnReadOrientationModel {
-    File alt_table
-    File ref_histogram
-    File? alt_histograms
-
+    Array[File] f1r2_tar_gz
     File? gatk_override
-    File? intervals
-    String tumor_sample
 
     # runtime
     Int? max_retries
@@ -823,10 +901,8 @@ task LearnReadOrientationModel {
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
         gatk --java-options "-Xmx${command_mem}m" LearnReadOrientationModel \
-        -alt-table ${alt_table} \
-        -ref-hist ${ref_histogram} \
-        -alt-hist ${alt_histograms} \
-        -O "${tumor_sample}-artifact-prior-table.tsv"
+            -I ${sep=" -I " f1r2_tar_gz} \
+            -O "artifact-priors.tar.gz"
     }
 
     runtime {
@@ -840,23 +916,16 @@ task LearnReadOrientationModel {
     }
 
     output {
-        File artifact_prior_table = "${tumor_sample}-artifact-prior-table.tsv"
+        File artifact_prior_table = "artifact-priors.tar.gz"
     }
 
 }
 
 task CalculateContamination {
     # inputs
-    File? intervals
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
-    File? variants_for_contamination
-    File? variants_for_contamination_index
+    String? intervals
+    File tumor_pileups
+    File? normal_pileups
 
     File? gatk_override
 
@@ -876,15 +945,8 @@ task CalculateContamination {
 
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
-        if [[ -f "${normal_bam}" ]]; then
-            gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-                -V ${variants_for_contamination} -L ${variants_for_contamination} -O normal_pileups.table
-            NORMAL_CMD="-matched normal_pileups.table"
-        fi
-
-        gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-            -V ${variants_for_contamination} -L ${variants_for_contamination} -O pileups.table
-        gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I pileups.table -O contamination.table --tumor-segmentation segments.table $NORMAL_CMD
+        gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I ${tumor_pileups} \
+        -O contamination.table --tumor-segmentation segments.table ${"-matched " + normal_pileups}
     }
 
     runtime {
@@ -893,11 +955,10 @@ task CalculateContamination {
         memory: command_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
     }
 
     output {
-        File pileups = "pileups.table"
         File contamination_table = "contamination.table"
         File maf_segments = "segments.table"
     }
@@ -906,12 +967,17 @@ task CalculateContamination {
 task Filter {
     # inputs
     File? intervals
+    File ref_fasta
+    File ref_fai
+    File ref_dict
     File unfiltered_vcf
-    File unfiltered_vcf_index
+    File unfiltered_vcf_idx
     String output_name
     Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
+    File? mutect_stats
+    File? artifact_priors_tar_gz
     File? contamination_table
     File? maf_segments
     String? m2_extra_filtering_args
@@ -937,9 +1003,13 @@ task Filter {
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
         gatk --java-options "-Xmx${command_mem}m" FilterMutectCalls -V ${unfiltered_vcf} \
+            -R ${ref_fasta} \
       	    -O ${output_vcf} \
       	    ${"--contamination-table " + contamination_table} \
       	    ${"--tumor-segmentation " + maf_segments} \
+      	    ${"--ob-priors " + artifact_priors_tar_gz} \
+      	    ${"-stats " + mutect_stats} \
+      	    --filtering-stats filtering.stats \
       	    ${m2_extra_filtering_args}
     }
 
@@ -949,69 +1019,14 @@ task Filter {
         memory: machine_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
-        cpu: select_first([cpu, 1])
-    }
-
-    output {
-        File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
-    }
-}
-
-task FilterByOrientationBias {
-    # input
-    File? gatk_override
-    File input_vcf
-    File input_vcf_index
-    String output_name
-    Boolean compress
-    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf +  if compress then ".tbi" else ".idx"
-    File pre_adapter_metrics
-    Array[String]? artifact_modes
-
-    # If artifact modes is passed in to the task as [], this task will fail.
-    Array[String] final_artifact_modes = select_first([artifact_modes, ["G/T", "C/T"]])
-
-    # runtime
-    Int? preemptible_attempts
-    Int? max_retries
-    String gatk_docker
-    Int? disk_space
-    Int? mem
-    Int? cpu
-    Boolean use_ssd = false
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 500
-
-    command {
-        set -e
-
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-
-        gatk --java-options "-Xmx${command_mem}m" FilterByOrientationBias \
-            -V ${input_vcf} \
-            -AM ${sep=" -AM " final_artifact_modes} \
-            -P ${pre_adapter_metrics} \
-            -O ${output_vcf}
-    }
-
-    runtime {
-        docker: gatk_docker
-        bootDiskSizeGb: 12
-        memory: command_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
     output {
         File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
+        File filtered_vcf_idx = "${output_vcf_idx}"
+        File filtering_stats = "filtering.stats"
     }
 }
 
@@ -1025,7 +1040,7 @@ task FilterAlignmentArtifacts {
     String output_name
     Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf +  if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf +  if compress then ".tbi" else ".idx"
     File realignment_index_bundle
     String? realignment_extra_args
 
@@ -1061,13 +1076,13 @@ task FilterAlignmentArtifacts {
         memory: command_mem + " MB"
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
     output {
         File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
+        File filtered_vcf_idx = "${output_vcf_idx}"
     }
 }
 
@@ -1139,7 +1154,7 @@ task oncotate_m2 {
         bootDiskSizeGb: 12
         disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
@@ -1168,44 +1183,58 @@ task SumFloats {
         docker: "python:2.7"
         disks: "local-disk " + 10 + " HDD"
         preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 3])
+        maxRetries: select_first([max_retries, 0])
     }
 }
 
-task FuncotateMaf {
-     # inputs
+task Funcotate {
+     # ==============
+     # Inputs
      File ref_fasta
-     File ref_fasta_index
+     File ref_fai
      File ref_dict
      File input_vcf
      File input_vcf_idx
      String reference_version
-     String output_format = "MAF"
+     String output_file_base_name
+     String output_format
+     Boolean compress
+     Boolean use_gnomad
+     # This should be updated when a new version of the data sources is released
+     # TODO: Make this dynamically chosen in the command.
+     File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz"
+     String? control_id
+     String? case_id
      String? sequencing_center
      String? sequence_source
-     String case_id
-     String? control_id
-
-     File? data_sources_tar_gz
      String? transcript_selection_mode
      File? transcript_selection_list
      Array[String]? annotation_defaults
      Array[String]? annotation_overrides
-     Boolean filter_funcotations
+     Array[String]? funcotator_excluded_fields
+     Boolean? filter_funcotations
      File? interval_list
 
      String? extra_args
 
      # ==============
      # Process input args:
+     String output_maf = output_file_base_name + ".maf"
+     String output_maf_index = output_maf + ".idx"
+     String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf"
+     String output_vcf_idx = output_vcf +  if compress then ".tbi" else ".idx"
+     String output_file = if output_format == "MAF" then output_maf else output_vcf
+     String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_idx
+     String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else ""
      String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else ""
      String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else ""
-     String filter_funcotations_args = if (filter_funcotations) then " --remove-filtered-variants " else ""
-     String final_output_filename = basename(input_vcf, ".vcf") + ".maf.annotated"
-     # ==============
-
-     # runtime
+     String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else ""
+     String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else ""
+     String interval_list_arg = if defined(interval_list) then " -L " else ""
+     String extra_args_arg = select_first([extra_args, ""])
 
+     # ==============
+     # Runtime options:
      String gatk_docker
      File? gatk_override
      Int? mem
@@ -1216,55 +1245,66 @@ task FuncotateMaf {
 
      Boolean use_ssd = false
 
-     # This should be updated when a new version of the data sources is released
-     String default_datasources_version = "funcotator_dataSources.v1.4.20180615"
-
      # You may have to change the following two parameter values depending on the task requirements
      Int default_ram_mb = 3000
-     # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).
+     # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
      Int default_disk_space_gb = 100
 
      # Mem is in units of GB but our command and memory runtime values are in MB
      Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb
      Int command_mem = machine_mem - 1000
 
+     String dollar = "$"
+
      command <<<
          set -e
          export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
-         DATA_SOURCES_TAR_GZ=${data_sources_tar_gz}
-         if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then
-             # We have to download the data sources:
-             echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ"
-             echo "Downloading default data sources..."
-             wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/funcotator/${default_datasources_version}.tar.gz
-             tar -zxf ${default_datasources_version}.tar.gz
-             DATA_SOURCES_FOLDER=${default_datasources_version}
-         else
-             # Extract the tar.gz:
-             mkdir datasources_dir
-             tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
-             DATA_SOURCES_FOLDER="$PWD/datasources_dir"
+         # Extract our data sources:
+         echo "Extracting data sources zip file..."
+         mkdir datasources_dir
+         tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
+         DATA_SOURCES_FOLDER="$PWD/datasources_dir"
+
+         # Handle gnomAD:
+         if ${use_gnomad} ; then
+             echo "Enabling gnomAD..."
+             for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do
+                 if [[ -f ${dollar}{DATA_SOURCES_FOLDER}/${dollar}{potential_gnomad_gz} ]] ; then
+                     cd ${dollar}{DATA_SOURCES_FOLDER}
+                     tar -zvxf ${dollar}{potential_gnomad_gz}
+                     cd -
+                 else
+                     echo "ERROR: Cannot find gnomAD folder: ${dollar}{potential_gnomad_gz}" 1>&2
+                     false
+                 fi
+             done
          fi
 
+         # Run Funcotator:
          gatk --java-options "-Xmx${command_mem}m" Funcotator \
              --data-sources-path $DATA_SOURCES_FOLDER \
              --ref-version ${reference_version} \
              --output-file-format ${output_format} \
              -R ${ref_fasta} \
              -V ${input_vcf} \
-             -O ${final_output_filename} \
-             ${"-L " + interval_list} \
+             -O ${output_file} \
+             ${interval_list_arg} ${default="" interval_list} \
+             --annotation-default normal_barcode:${default="Unknown" control_id} \
+             --annotation-default tumor_barcode:${default="Unknown" case_id} \
+             --annotation-default Center:${default="Unknown" sequencing_center} \
+             --annotation-default source:${default="Unknown" sequence_source} \
              ${"--transcript-selection-mode " + transcript_selection_mode} \
-             ${"--transcript-list " + transcript_selection_list} \
-            --annotation-default normal_barcode:${control_id} \
-            --annotation-default tumor_barcode:${case_id} \
-            --annotation-default Center:${default="Unknown" sequencing_center} \
-            --annotation-default source:${default="Unknown" sequence_source} \
+             ${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \
              ${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \
              ${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides} \
+             ${excluded_fields_args}${default="" sep=" --exclude-field " funcotator_excluded_fields} \
              ${filter_funcotations_args} \
-             ${extra_args}
+             ${extra_args_arg}
+         # Make sure we have a placeholder index for MAF files so this workflow doesn't fail:
+         if [[ "${output_format}" == "MAF" ]] ; then
+            touch ${output_maf_index}
+         fi
      >>>
 
      runtime {
@@ -1273,11 +1313,12 @@ task FuncotateMaf {
          memory: machine_mem + " MB"
          disks: "local-disk " + select_first([disk_space_gb, default_disk_space_gb]) + if use_ssd then " SSD" else " HDD"
          preemptible: select_first([preemptible_attempts, 3])
-         maxRetries: select_first([max_retries, 3])
+         maxRetries: select_first([max_retries, 0])
          cpu: select_first([cpu, 1])
      }
 
      output {
-         File funcotated_output = "${final_output_filename}"
+         File funcotated_output_file = "${output_file}"
+         File funcotated_output_file_index = "${output_file_index}"
      }
  }
diff --git a/mutect2_nio.wdl b/mutect2_nio.wdl
index a7c7e75..ff9f3c6 100644
--- a/mutect2_nio.wdl
+++ b/mutect2_nio.wdl
@@ -23,16 +23,9 @@
 ## ** Workflow options **
 ## intervals: genomic intervals (will be used for scatter)
 ## scatter_count: number of parallel jobs to generate when scattering over intervals
-## artifact_modes: types of artifacts to consider in the orientation bias filter (optional)
 ## m2_extra_args, m2_extra_filtering_args: additional arguments for Mutect2 calling and filtering (optional)
 ## split_intervals_extra_args: additional arguments for splitting intervals before scattering (optional)
-## run_orientation_bias_filter: (optional) if true, run the orientation bias filter, which is the GATK implementation of
-##         D-ToxoG with modifications to allow multiple artifact modes.
-##         For more information on D-ToxoG, see https://software.broadinstitute.org/cancer/cga/dtoxog
 ## run_orientation_bias_mixture_model_filter: (optional) if true, filter orientation bias sites with the read orientation artifact mixture model.
-##         This is the recommended orientation bias filter, particularly for data sequenced on Illumina NovaSeq.
-##         If set to true, artifact_mode will be ignored, as the model learns the artifact modes on its own.
-##         While we offer both options, there's no need to run both the mixture model filter and the one based on D-ToxoG.
 ## run_oncotator: if true, annotate the M2 VCFs using oncotator (to produce a TCGA MAF).  Important:  This requires a
 ##                   docker image and should  not be run in environments where docker is unavailable (e.g. SGE cluster on
 ##                   a Broad on-prem VM).  Access to docker hub is also required, since the task downloads a public docker image.
@@ -56,12 +49,17 @@
 ##
 ## Funcotator parameters (see Funcotator help for more details).
 ## funco_reference_version: "hg19" for hg19 or b37.  "hg38" for hg38.  Default: "hg19"
-## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
+## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file.  Default: "MAF"
+## funco_compress: (Only valid if funco_output_format == "VCF" )  If true, will compress the output of Funcotator.  If false, produces an uncompressed output file.  Default: false
+## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance).  If false, will not annotate with gnomAD.  Default: false
 ## funco_transcript_selection_mode: How to select transcripts in Funcotator.  ALL, CANONICAL, or BEST_EFFECT
+## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
 ## funco_data_sources_tar_gz:  Funcotator datasources tar gz file.  Bucket location is recommended when running on the cloud.
 ## funco_annotation_defaults:  Default values for annotations, when values are unspecified.  Specified as  <ANNOTATION>:<VALUE>.  For example:  "Center:Broad"
 ## funco_annotation_overrides:  Values for annotations, even when values are unspecified.  Specified as  <ANNOTATION>:<VALUE>.  For example:  "Center:Broad"
 ## funcotator_excluded_fields:  Annotations that should not appear in the output (VCF or MAF).  Specified as  <ANNOTATION>.  For example:  "ClinVar_ALLELEID"
+## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column).  If false, will annotate all variants in the input file.  Default: true
+## funcotator_extra_args: Any additional arguments to pass to Funcotator.  Default: ""
 ##
 ## Outputs :
 ## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
@@ -83,23 +81,18 @@ workflow Mutect2 {
     File ref_fasta
     File ref_fai
     File ref_dict
-    File tumor_bam
-    File tumor_bai
-    File? normal_bam
-    File? normal_bai
+    File tumor_reads
+    File tumor_reads_index
+    File? normal_reads
+    File? normal_reads_index
     File? pon
     Int scatter_count
     File? gnomad
     File? variants_for_contamination
     File? realignment_index_bundle
     String? realignment_extra_args
-    Boolean? run_orientation_bias_filter
-    Array[String]? artifact_modes
-    Boolean run_ob_filter = select_first([run_orientation_bias_filter, true]) && (length(select_first([artifact_modes, ["G/T", "C/T"]])) > 0)
     Boolean? run_orientation_bias_mixture_model_filter
-    Boolean run_ob_mm_filter = select_first([run_orientation_bias_mixture_model_filter, false])
-    File? ob_mm_filter_training_intervals
-    File? tumor_sequencing_artifact_metrics
+    Boolean run_ob_filter = select_first([run_orientation_bias_mixture_model_filter, false])
     String? m2_extra_args
     String? m2_extra_filtering_args
     String? split_intervals_extra_args
@@ -119,22 +112,28 @@ workflow Mutect2 {
     File? default_config_file
     String? oncotator_extra_args
 
-    # funcotator inputs
+    # Funcotator inputs
     Boolean? run_funcotator
     Boolean run_funcotator_or_default = select_first([run_funcotator, false])
     String? funco_reference_version
+    String? funco_output_format
+    Boolean? funco_compress
+    Boolean? funco_use_gnomad_AF
     File? funco_data_sources_tar_gz
     String? funco_transcript_selection_mode
     File? funco_transcript_selection_list
     Array[String]? funco_annotation_defaults
     Array[String]? funco_annotation_overrides
     Array[String]? funcotator_excluded_fields
+    Boolean? funco_filter_funcotations
     String? funcotator_extra_args
 
-    File? gatk_override
+    String funco_default_output_format = "MAF"
+
 
     # runtime
     String gatk_docker
+    File? gatk_override
     String basic_bash_docker = "ubuntu:16.04"
     String? oncotator_docker
     String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.9.0"])
@@ -151,9 +150,9 @@ workflow Mutect2 {
 
     # Disk sizes used for dynamic sizing
     Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_dict, "GB") + size(ref_fai, "GB"))
-    Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bai, "GB"))
+    Int tumor_reads_size = ceil(size(tumor_reads, "GB") + size(tumor_reads_index, "GB"))
     Int gnomad_vcf_size = if defined(gnomad) then ceil(size(gnomad, "GB")) else 0
-    Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bai, "GB")) else 0
+    Int normal_reads_size = if defined(normal_reads) then ceil(size(normal_reads, "GB") + size(normal_reads_index, "GB")) else 0
 
     # If no tar is provided, the task downloads one from broads ftp server
     Int onco_tar_size = if defined(onco_ds_tar_gz) then ceil(size(onco_ds_tar_gz, "GB") * 3) else 100
@@ -168,16 +167,56 @@ workflow Mutect2 {
     # Small is for metrics/other vcfs
     Float large_input_to_output_multiplier = 2.25
     Float small_input_to_output_multiplier = 2.0
+    Float cram_to_bam_multiplier = 6.0
 
     # logic about output file names -- these are the names *without* .vcf extensions
-    String output_basename = basename(tumor_bam, ".bam")
+    String output_basename = basename(basename(tumor_reads, ".bam"),".cram")  #hacky way to strip either .bam or .cram
     String unfiltered_name = output_basename + "-unfiltered"
     String filtered_name = output_basename + "-filtered"
     String funcotated_name = output_basename + "-funcotated"
 
-    String output_vcf_name = basename(tumor_bam, ".bam") + ".vcf"
+    String output_vcf_name = output_basename + ".vcf"
 
     # Size M2 differently based on if we are using NIO or not
+    Int tumor_cram_to_bam_disk = ceil(tumor_reads_size * cram_to_bam_multiplier)
+    Int normal_cram_to_bam_disk = ceil(normal_reads_size * cram_to_bam_multiplier)
+
+    if (basename(tumor_reads) != basename(tumor_reads, ".cram")) {
+        call CramToBam as TumorCramToBam {
+            input:
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                cram = tumor_reads,
+                crai = tumor_reads_index,
+                name = output_basename,
+                disk_size = tumor_cram_to_bam_disk
+        }
+    }
+
+    String normal_or_empty = select_first([normal_reads, ""])
+    if (basename(normal_or_empty) != basename(normal_or_empty, ".cram")) {
+        String normal_basename = basename(basename(normal_or_empty, ".bam"),".cram")
+        call CramToBam as NormalCramToBam {
+            input:
+                ref_fasta = ref_fasta,
+                ref_fai = ref_fai,
+                ref_dict = ref_dict,
+                cram = normal_reads,
+                crai = normal_reads_index,
+                name = normal_basename,
+                disk_size = normal_cram_to_bam_disk
+        }
+    }
+
+    File tumor_bam = select_first([TumorCramToBam.output_bam, tumor_reads])
+    File tumor_bai = select_first([TumorCramToBam.output_bai, tumor_reads_index])
+    File? normal_bam = if defined(normal_reads) then select_first([NormalCramToBam.output_bam, normal_reads]) else normal_reads
+    File? normal_bai = if defined(normal_reads) then select_first([NormalCramToBam.output_bai, normal_reads_index]) else normal_reads_index
+
+    Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bai, "GB"))
+    Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bai, "GB")) else 0
+
     Int m2_output_size = tumor_bam_size / scatter_count
     Int m2_per_scatter_size = ((tumor_bam_size + normal_bam_size) / scatter_count) + ref_size + (gnomad_vcf_size / scatter_count) + m2_output_size + disk_pad
 
@@ -208,8 +247,9 @@ workflow Mutect2 {
                 preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
                 m2_extra_args = m2_extra_args,
-                artifact_prior_table = LearnReadOrientationModel.artifact_prior_table,
+                variants_for_contamination = variants_for_contamination,
                 make_bamout = make_bamout_or_default,
+                run_ob_filter = run_ob_filter,
                 compress = compress,
                 gga_vcf = gga_vcf,
                 gatk_override = gatk_override,
@@ -228,10 +268,21 @@ workflow Mutect2 {
             max_retries = max_retries
     }
 
+    if (run_ob_filter) {
+        call LearnReadOrientationModel {
+            input:
+                f1r2_tar_gz = M2.f1r2_counts,
+                gatk_override = gatk_override,
+                gatk_docker = gatk_docker,
+                preemptible_attempts = preemptible_attempts,
+                max_retries = max_retries
+        }
+    }
+
     call MergeVCFs {
         input:
             input_vcfs = M2.unfiltered_vcf,
-            input_vcf_indices = M2.unfiltered_vcf_index,
+            input_vcf_indices = M2.unfiltered_vcf_idx,
             output_name = unfiltered_name,
             compress = compress,
             gatk_override = gatk_override,
@@ -263,68 +314,55 @@ workflow Mutect2 {
         }
     }
 
-    if (run_ob_filter && !defined(tumor_sequencing_artifact_metrics)) {
-        call CollectSequencingArtifactMetrics {
+    call MergeStats {
+        input:
+             stats = M2.stats,
+             gatk_override = gatk_override,
+             gatk_docker = gatk_docker
+    }
+
+    if (defined(variants_for_contamination)) {
+        call MergePileupSummaries as MergeTumorPileups {
             input:
+                input_tables = M2.tumor_pileups,
+                output_name = output_basename,
+                ref_dict = ref_dict,
+                gatk_override = gatk_override,
                 gatk_docker = gatk_docker,
-                ref_fasta = ref_fasta,
-                ref_fai = ref_fai,
                 preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
-                tumor_bam = tumor_bam,
-                tumor_bai = tumor_bai,
-                gatk_override = gatk_override,
-                disk_space = tumor_bam_size + ref_size + disk_pad
+                disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
         }
-    }
 
-        if (run_ob_mm_filter) {
-            call CollectF1R2Counts {
+        if (defined(normal_bam)){
+            call MergePileupSummaries as MergeNormalPileups {
                 input:
-                    gatk_docker = gatk_docker,
-                    ref_fasta = ref_fasta,
-                    ref_fai = ref_fai,
+                    input_tables = M2.normal_pileups,
+                    output_name = output_basename,
                     ref_dict = ref_dict,
-                    preemptible_attempts = preemptible_attempts,
-                    tumor_bam = tumor_bam,
-                    tumor_bai = tumor_bai,
-                    gatk_override = gatk_override,
-                    disk_space = tumor_bam_size + ref_size + disk_pad,
-                    intervals = if defined(ob_mm_filter_training_intervals) then ob_mm_filter_training_intervals else intervals,
-                    max_retries = max_retries
-            }
-
-            call LearnReadOrientationModel {
-                input:
-                    alt_table = CollectF1R2Counts.alt_table,
-                    ref_histogram = CollectF1R2Counts.ref_histogram,
-                    alt_histograms = CollectF1R2Counts.alt_histograms,
-                    tumor_sample = CollectF1R2Counts.tumor_sample,
                     gatk_override = gatk_override,
                     gatk_docker = gatk_docker,
                     preemptible_attempts = preemptible_attempts,
-                    max_retries = max_retries
+                    max_retries = max_retries,
+                    disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
             }
         }
 
-    if (defined(variants_for_contamination)) {
         call CalculateContamination {
             input:
                 gatk_override = gatk_override,
-                intervals = intervals,
-                ref_fasta = ref_fasta,
                 preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
                 gatk_docker = gatk_docker,
-                tumor_bam = tumor_bam,
-                normal_bam = normal_bam,
-                variants_for_contamination = variants_for_contamination,
+                tumor_pileups = MergeTumorPileups.merged_table,
+                normal_pileups = MergeNormalPileups.merged_table,
                 disk_space = tumor_bam_size + normal_bam_size + ceil(size(variants_for_contamination, "GB") * small_input_to_output_multiplier) + disk_pad
         }
     }
 
     call Filter {
         input:
+            ref_fasta = ref_fasta,
             gatk_override = gatk_override,
             gatk_docker = gatk_docker,
             intervals = intervals,
@@ -332,34 +370,17 @@ workflow Mutect2 {
             output_name = filtered_name,
             compress = compress,
             preemptible_attempts = preemptible_attempts,
+            mutect_stats = MergeStats.merged_stats,
             max_retries = max_retries,
             contamination_table = CalculateContamination.contamination_table,
             maf_segments = CalculateContamination.maf_segments,
+            artifact_priors_tar_gz = LearnReadOrientationModel.artifact_prior_table,
             m2_extra_filtering_args = m2_extra_filtering_args,
             disk_space = ceil(size(MergeVCFs.merged_vcf, "GB") * small_input_to_output_multiplier) + disk_pad
     }
 
-    if (run_ob_filter) {
-        # Get the metrics either from the workflow input or CollectSequencingArtifactMetrics if no workflow input is provided
-        File input_artifact_metrics = select_first([tumor_sequencing_artifact_metrics, CollectSequencingArtifactMetrics.pre_adapter_metrics])
-
-        call FilterByOrientationBias {
-            input:
-                gatk_override = gatk_override,
-                input_vcf = Filter.filtered_vcf,
-                output_name = filtered_name,
-                compress = compress,
-                gatk_docker = gatk_docker,
-                preemptible_attempts = preemptible_attempts,
-                max_retries = max_retries,
-                pre_adapter_metrics = input_artifact_metrics,
-                artifact_modes = artifact_modes,
-                disk_space = ceil(size(Filter.filtered_vcf, "GB") * small_input_to_output_multiplier) + ceil(size(input_artifact_metrics, "GB")) + disk_pad
-        }
-    }
-
     if (defined(realignment_index_bundle)) {
-        File realignment_filter_input = select_first([FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
+        File realignment_filter_input = Filter.filtered_vcf
         call FilterAlignmentArtifacts {
             input:
                 gatk_override = gatk_override,
@@ -375,7 +396,7 @@ workflow Mutect2 {
     }
 
     if (run_oncotator_or_default) {
-        File oncotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
+        File oncotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
         call oncotate_m2 {
             input:
                 m2_vcf = oncotate_vcf_input,
@@ -396,44 +417,89 @@ workflow Mutect2 {
     }
 
     if (run_funcotator_or_default) {
-        File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
-        call FuncotateMaf {
+        File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
+        File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_idx, Filter.filtered_vcf_idx])
+        call Funcotate {
             input:
+                ref_fasta = ref_fasta,
                 input_vcf = funcotate_vcf_input,
                 input_vcf_idx = funcotate_vcf_input_index,
-                ref_fasta = ref_fasta,
                 reference_version = select_first([funco_reference_version, "hg19"]),
+                output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated",
+                output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format,
+                compress = if defined(funco_compress) then funco_compress else false,
+                use_gnomad = if defined(funco_use_gnomad_AF) then funco_use_gnomad_AF else false,
                 data_sources_tar_gz = funco_data_sources_tar_gz,
                 case_id = M2.tumor_sample[0],
                 control_id = M2.normal_sample[0],
+                sequencing_center = sequencing_center,
+                sequence_source = sequence_source,
                 transcript_selection_mode = funco_transcript_selection_mode,
                 transcript_selection_list = funco_transcript_selection_list,
                 annotation_defaults = funco_annotation_defaults,
                 annotation_overrides = funco_annotation_overrides,
+                funcotator_excluded_fields = funcotator_excluded_fields,
+                filter_funcotations = filter_funcotations_or_default,
+                extra_args = funcotator_extra_args,
                 gatk_docker = gatk_docker,
                 gatk_override = gatk_override,
-                filter_funcotations = filter_funcotations_or_default,
-                funcotator_excluded_fields = funcotator_excluded_fields,
-                sequencing_center = sequencing_center,
-                sequence_source = sequence_source,
-                disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + funco_tar_size + disk_pad,
+                preemptible_attempts = preemptible_attempts,
                 max_retries = max_retries,
-                extra_args = funcotator_extra_args
+                disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad
         }
     }
 
     output {
-        File filtered_vcf = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
-        File filtered_vcf_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
+        File filtered_vcf = select_first([FilterAlignmentArtifacts.filtered_vcf, Filter.filtered_vcf])
+        File filtered_vcf_idx = select_first([FilterAlignmentArtifacts.filtered_vcf_idx, Filter.filtered_vcf_idx])
+        File filtering_stats = Filter.filtering_stats
+        File mutect_stats = MergeStats.merged_stats
         File? contamination_table = CalculateContamination.contamination_table
 
         File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
-        File? funcotated_maf = FuncotateMaf.funcotated_output
-        File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics
+        File? funcotated_file = Funcotate.funcotated_output_file
+        File? funcotated_file_index = Funcotate.funcotated_output_file_index
         File? bamout = MergeBamOuts.merged_bam_out
         File? bamout_index = MergeBamOuts.merged_bam_out_index
         File? maf_segments = CalculateContamination.maf_segments
+        File? read_orientation_model_params = LearnReadOrientationModel.artifact_prior_table
+    }
+}
+
+task CramToBam {
+
+    File ref_fasta
+    File ref_fai
+    File ref_dict
+    File cram
+    File crai
+    String name
+    Int disk_size
+    Int? mem
+
+    Int machine_mem = if defined(mem) then mem * 1000 else 6000
+
+    #Calls samtools view to do the conversion
+    command {
+        #Set -e and -o says if any command I run fails in this script, make sure to return a failure
+        set -e
+        set -o pipefail
+
+        samtools view -h -T ${ref_fasta} ${cram} |
+            samtools view -b -o ${name}.bam -
+        samtools index -b ${name}.bam
+        mv ${name}.bam.bai ${name}.bai
+    }
+
+    runtime {
+        docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.3-1513176735"
+        memory: machine_mem + " MB"
+        disks: "local-disk " + disk_size + " HDD"
+    }
+
+    output {
+        File output_bam = "${name}.bam"
+        File output_bai = "${name}.bai"
     }
 }
 
@@ -500,13 +566,16 @@ task M2 {
     String? gnomad
     String? m2_extra_args
     Boolean? make_bamout
+    Boolean? run_ob_filter
     Boolean compress
     String? gga_vcf
     String? gga_vcf_idx
-    File? artifact_prior_table
+    String? variants_for_contamination
 
     String output_vcf = "output" + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
+
+    String output_stats = output_vcf + ".stats"
 
     File? gatk_override
 
@@ -531,6 +600,7 @@ task M2 {
 
         # We need to create these files regardless, even if they stay empty
         touch bamout.bam
+        touch f1r2.tar.gz
         echo "" > normal_name.txt
 
         gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode
@@ -548,11 +618,26 @@ task M2 {
             ${"--germline-resource " + gnomad} \
             ${"-pon " + pon} \
             ${"-L " + intervals} \
-            ${"--genotyping-mode GENOTYPE_GIVEN_ALLELES --alleles " + gga_vcf} \
+            ${"--alleles " + gga_vcf} \
             -O "${output_vcf}" \
             ${true='--bam-output bamout.bam' false='' make_bamout} \
-            ${"--orientation-bias-artifact-priors " + artifact_prior_table} \
+            ${true='--f1r2-tar-gz f1r2.tar.gz' false='' run_ob_filter} \
             ${m2_extra_args}
+
+        ### GetPileupSummaries
+        # These must be created, even if they remain empty, as cromwell doesn't support optional output 
+        touch tumor-pileups.table
+        touch normal-pileups.table
+
+        if [[ ! -z "${variants_for_contamination}" ]]; then
+            gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
+                -V ${variants_for_contamination} -L ${variants_for_contamination} -O tumor-pileups.table
+
+            if [[ ! -z "${normal_bam}" ]]; then
+                gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
+                    -V ${variants_for_contamination} -L ${variants_for_contamination} -O normal-pileups.table
+            fi
+        fi
     >>>
 
     runtime {
@@ -567,10 +652,14 @@ task M2 {
 
     output {
         File unfiltered_vcf = "${output_vcf}"
-        File unfiltered_vcf_index = "${output_vcf_index}"
+        File unfiltered_vcf_idx = "${output_vcf_idx}"
         File output_bamOut = "bamout.bam"
         String tumor_sample = read_string("tumor_name.txt")
         String normal_sample = read_string("normal_name.txt")
+        File stats = "${output_stats}"
+        File f1r2_counts = "f1r2.tar.gz"
+        File tumor_pileups = "tumor-pileups.table"
+        File normal_pileups = "normal-pileups.table"
     }
 }
 
@@ -581,7 +670,7 @@ task MergeVCFs {
     String output_name
     Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
 
     File? gatk_override
 
@@ -618,7 +707,7 @@ task MergeVCFs {
 
     output {
         File merged_vcf = "${output_vcf}"
-        File merged_vcf_index = "${output_vcf_index}"
+        File merged_vcf_idx = "${output_vcf_idx}"
     }
 }
 
@@ -678,12 +767,10 @@ task MergeBamOuts {
     }
 }
 
-task CollectSequencingArtifactMetrics {
+
+task MergeStats {
     # inputs
-    File ref_fasta
-    File ref_fai
-    File tumor_bam
-    File tumor_bai
+    Array[File]+ stats
 
     File? gatk_override
 
@@ -697,72 +784,62 @@ task CollectSequencingArtifactMetrics {
     Boolean use_ssd = false
 
     # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
+    Int machine_mem = if defined(mem) then mem * 1000 else 2000
     Int command_mem = machine_mem - 1000
 
     command {
         set -e
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-        gatk --java-options "-Xmx${command_mem}m" CollectSequencingArtifactMetrics \
-            -I ${tumor_bam} -O "gatk" -R ${ref_fasta} -VALIDATION_STRINGENCY LENIENT
+
+
+        gatk --java-options "-Xmx${command_mem}m" MergeMutectStats \
+            -stats ${sep=" -stats " stats} -O merged.stats
+
     }
 
     runtime {
         docker: gatk_docker
         bootDiskSizeGb: 12
         memory: machine_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
+        disks: "local-disk " + select_first([disk_space, 10]) + if use_ssd then " SSD" else " HDD"
         preemptible: select_first([preemptible_attempts, 10])
         maxRetries: select_first([max_retries, 0])
         cpu: select_first([cpu, 1])
     }
 
     output {
-        File pre_adapter_metrics = "gatk.pre_adapter_detail_metrics"
+        File merged_stats = "merged.stats"
     }
 }
 
-
-# Data collection step of the orientation bias mixture model, which is the recommended orientation bias filter as of September 2018
-task CollectF1R2Counts {
-    # input
-    File ref_fasta
-    File ref_fai
-    File ref_dict
-    File tumor_bam
-    File tumor_bai
-
+task MergePileupSummaries {
+    # input_tables needs to be optional because GetPileupSummaries is in an if-block
+    Array[File?] input_tables
+    String output_name
     File? gatk_override
-    File? intervals
+    File ref_dict
 
     # runtime
-    Int? max_retries
     String gatk_docker
     Int? mem
     Int? preemptible_attempts
+    Int? max_retries
     Int? disk_space
     Int? cpu
     Boolean use_ssd = false
 
     # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
+    Int machine_mem = if defined(mem) then mem * 1000 else 3500
     Int command_mem = machine_mem - 1000
 
     command {
         set -e
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
-        # Get the sample name. The task M2 retrieves this information too, but it must be done separately here
-        # to avoid a cyclic dependency
-        gatk --java-options "-Xmx${command_mem}m" GetSampleName -R ${ref_fasta} -I ${tumor_bam} -O tumor_name.txt -encode
-        tumor_name=$(head -n 1 tumor_name.txt)
-
-        gatk --java-options "-Xmx${command_mem}m" CollectF1R2Counts \
-        -I ${tumor_bam} -R ${ref_fasta} \
-        ${"-L " + intervals} \
-        -alt-table "$tumor_name-alt.tsv" \
-        -ref-hist "$tumor_name-ref.metrics" \
-        -alt-hist "$tumor_name-alt-depth1.metrics"
+        gatk --java-options "-Xmx${command_mem}m" GatherPileupSummaries \
+        --sequence-dictionary ${ref_dict} \
+        -I ${sep=' -I ' input_tables} \
+        -O ${output_name}.tsv
     }
 
     runtime {
@@ -776,22 +853,14 @@ task CollectF1R2Counts {
     }
 
     output {
-        File alt_table = glob("*-alt.tsv")[0]
-        File ref_histogram = glob("*-ref.metrics")[0]
-        File alt_histograms = glob("*-alt-depth1.metrics")[0]
-        String tumor_sample = read_string("tumor_name.txt")
-    }
+        File merged_table = "${output_name}.tsv"
+    }    
 }
 
 # Learning step of the orientation bias mixture model, which is the recommended orientation bias filter as of September 2018
 task LearnReadOrientationModel {
-    File alt_table
-    File ref_histogram
-    File? alt_histograms
-
+    Array[File] f1r2_tar_gz
     File? gatk_override
-    File? intervals
-    String tumor_sample
 
     # runtime
     Int? max_retries
@@ -811,10 +880,8 @@ task LearnReadOrientationModel {
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
         gatk --java-options "-Xmx${command_mem}m" LearnReadOrientationModel \
-        -alt-table ${alt_table} \
-        -ref-hist ${ref_histogram} \
-        -alt-hist ${alt_histograms} \
-        -O "${tumor_sample}-artifact-prior-table.tsv"
+            -I ${sep=" -I " f1r2_tar_gz} \
+            -O "artifact-priors.tar.gz"
     }
 
     runtime {
@@ -828,7 +895,7 @@ task LearnReadOrientationModel {
     }
 
     output {
-        File artifact_prior_table = "${tumor_sample}-artifact-prior-table.tsv"
+        File artifact_prior_table = "artifact-priors.tar.gz"
     }
 
 }
@@ -836,10 +903,8 @@ task LearnReadOrientationModel {
 task CalculateContamination {
     # inputs
     String? intervals
-    String ref_fasta
-    String tumor_bam
-    String? normal_bam
-    String? variants_for_contamination
+    File tumor_pileups
+    File? normal_pileups
 
     File? gatk_override
 
@@ -859,15 +924,8 @@ task CalculateContamination {
 
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
-        if [[ -f "${normal_bam}" ]]; then
-            gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-                -V ${variants_for_contamination} -L ${variants_for_contamination} -O normal_pileups.table
-            NORMAL_CMD="-matched normal_pileups.table"
-        fi
-
-        gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-            -V ${variants_for_contamination} -L ${variants_for_contamination} -O pileups.table
-        gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I pileups.table -O contamination.table --tumor-segmentation segments.table $NORMAL_CMD
+        gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I ${tumor_pileups} \
+        -O contamination.table --tumor-segmentation segments.table ${"-matched " + normal_pileups}
     }
 
     runtime {
@@ -880,7 +938,6 @@ task CalculateContamination {
     }
 
     output {
-        File pileups = "pileups.table"
         File contamination_table = "contamination.table"
         File maf_segments = "segments.table"
     }
@@ -889,11 +946,14 @@ task CalculateContamination {
 task Filter {
     # inputs
     String? intervals
+    String ref_fasta
     String unfiltered_vcf
     String output_name
     Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf + if compress then ".tbi" else ".idx"
+    File? mutect_stats
+    File? artifact_priors_tar_gz
     File? contamination_table
     File? maf_segments
     String? m2_extra_filtering_args
@@ -919,9 +979,13 @@ task Filter {
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
         gatk --java-options "-Xmx${command_mem}m" FilterMutectCalls -V ${unfiltered_vcf} \
+            -R ${ref_fasta} \
       	    -O ${output_vcf} \
       	    ${"--contamination-table " + contamination_table} \
       	    ${"--tumor-segmentation " + maf_segments} \
+      	    ${"--ob-priors " + artifact_priors_tar_gz} \
+      	    ${"-stats " + mutect_stats} \
+      	    --filtering-stats filtering.stats \
       	    ${m2_extra_filtering_args}
     }
 
@@ -937,62 +1001,8 @@ task Filter {
 
     output {
         File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
-    }
-}
-
-task FilterByOrientationBias {
-    # input
-    File? gatk_override
-    String input_vcf
-    String output_name
-    Boolean compress
-    String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf +  if compress then ".tbi" else ".idx"
-    File pre_adapter_metrics
-    Array[String]? artifact_modes
-
-    # If artifact modes is passed in to the task as [], this task will fail.
-    Array[String] final_artifact_modes = select_first([artifact_modes, ["G/T", "C/T"]])
-
-    # runtime
-    Int? preemptible_attempts
-    Int? max_retries
-    String gatk_docker
-    Int? disk_space
-    Int? mem
-    Int? cpu
-    Boolean use_ssd = false
-
-    # Mem is in units of GB but our command and memory runtime values are in MB
-    Int machine_mem = if defined(mem) then mem * 1000 else 7000
-    Int command_mem = machine_mem - 500
-
-    command {
-        set -e
-
-        export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-
-        gatk --java-options "-Xmx${command_mem}m" FilterByOrientationBias \
-            -V ${input_vcf} \
-            -AM ${sep=" -AM " final_artifact_modes} \
-            -P ${pre_adapter_metrics} \
-            -O ${output_vcf}
-    }
-
-    runtime {
-        docker: gatk_docker
-        bootDiskSizeGb: 12
-        memory: command_mem + " MB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
-        preemptible: select_first([preemptible_attempts, 10])
-        maxRetries: select_first([max_retries, 0])
-        cpu: select_first([cpu, 1])
-    }
-
-    output {
-        File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
+        File filtered_vcf_idx = "${output_vcf_idx}"
+        File filtering_stats = "filtering.stats"
     }
 }
 
@@ -1004,7 +1014,7 @@ task FilterAlignmentArtifacts {
     String output_name
     Boolean compress
     String output_vcf = output_name + if compress then ".vcf.gz" else ".vcf"
-    String output_vcf_index = output_vcf +  if compress then ".tbi" else ".idx"
+    String output_vcf_idx = output_vcf +  if compress then ".tbi" else ".idx"
     File realignment_index_bundle
     String? realignment_extra_args
 
@@ -1046,7 +1056,7 @@ task FilterAlignmentArtifacts {
 
     output {
         File filtered_vcf = "${output_vcf}"
-        File filtered_vcf_index = "${output_vcf_index}"
+        File filtered_vcf_idx = "${output_vcf_idx}"
     }
 }
 
@@ -1151,40 +1161,52 @@ task SumFloats {
     }
 }
 
-task FuncotateMaf {
-     # inputs
+task Funcotate {
+     # ==============
+     # Inputs
      String ref_fasta
      String input_vcf
      String input_vcf_idx
      String reference_version
-     String output_format = "MAF"
+     String output_file_base_name
+     String output_format
+     Boolean compress
+     Boolean use_gnomad
+     # This should be updated when a new version of the data sources is released
+     # TODO: Make this dynamically chosen in the command.
+     File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz"
+     String? control_id
+     String? case_id
      String? sequencing_center
      String? sequence_source
-     String case_id
-     String? control_id
-
-     File? data_sources_tar_gz
      String? transcript_selection_mode
      File? transcript_selection_list
      Array[String]? annotation_defaults
      Array[String]? annotation_overrides
      Array[String]? funcotator_excluded_fields
-     Boolean filter_funcotations
+     Boolean? filter_funcotations
      File? interval_list
 
      String? extra_args
 
      # ==============
      # Process input args:
+     String output_maf = output_file_base_name + ".maf"
+     String output_maf_index = output_maf + ".idx"
+     String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf"
+     String output_vcf_idx = output_vcf +  if compress then ".tbi" else ".idx"
+     String output_file = if output_format == "MAF" then output_maf else output_vcf
+     String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_idx
+     String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else ""
      String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else ""
      String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else ""
-     String filter_funcotations_args = if (filter_funcotations) then " --remove-filtered-variants " else ""
+     String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else ""
      String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else ""
-     String final_output_filename = basename(input_vcf, ".vcf") + ".maf.annotated"
-     # ==============
-
-     # runtime
+     String interval_list_arg = if defined(interval_list) then " -L " else ""
+     String extra_args_arg = select_first([extra_args, ""])
 
+     # ==============
+     # Runtime options:
      String gatk_docker
      File? gatk_override
      Int? mem
@@ -1195,56 +1217,66 @@ task FuncotateMaf {
 
      Boolean use_ssd = false
 
-     # This should be updated when a new version of the data sources is released
-     String default_datasources_version = "funcotator_dataSources.v1.4.20180615"
-
      # You may have to change the following two parameter values depending on the task requirements
      Int default_ram_mb = 3000
-     # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).
+     # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).  Please see [TODO: Link from Jose] for examples.
      Int default_disk_space_gb = 100
 
      # Mem is in units of GB but our command and memory runtime values are in MB
      Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb
      Int command_mem = machine_mem - 1000
 
+     String dollar = "$"
+
      command <<<
          set -e
          export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
 
-         DATA_SOURCES_TAR_GZ=${data_sources_tar_gz}
-         if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then
-             # We have to download the data sources:
-             echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ"
-             echo "Downloading default data sources..."
-             wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/funcotator/${default_datasources_version}.tar.gz
-             tar -zxf ${default_datasources_version}.tar.gz
-             DATA_SOURCES_FOLDER=${default_datasources_version}
-         else
-             # Extract the tar.gz:
-             mkdir datasources_dir
-             tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
-             DATA_SOURCES_FOLDER="$PWD/datasources_dir"
+         # Extract our data sources:
+         echo "Extracting data sources zip file..."
+         mkdir datasources_dir
+         tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
+         DATA_SOURCES_FOLDER="$PWD/datasources_dir"
+
+         # Handle gnomAD:
+         if ${use_gnomad} ; then
+             echo "Enabling gnomAD..."
+             for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do
+                 if [[ -f ${dollar}{DATA_SOURCES_FOLDER}/${dollar}{potential_gnomad_gz} ]] ; then
+                     cd ${dollar}{DATA_SOURCES_FOLDER}
+                     tar -zvxf ${dollar}{potential_gnomad_gz}
+                     cd -
+                 else
+                     echo "ERROR: Cannot find gnomAD folder: ${dollar}{potential_gnomad_gz}" 1>&2
+                     false
+                 fi
+             done
          fi
 
+         # Run Funcotator:
          gatk --java-options "-Xmx${command_mem}m" Funcotator \
              --data-sources-path $DATA_SOURCES_FOLDER \
              --ref-version ${reference_version} \
              --output-file-format ${output_format} \
              -R ${ref_fasta} \
              -V ${input_vcf} \
-             -O ${final_output_filename} \
-             ${"-L " + interval_list} \
+             -O ${output_file} \
+             ${interval_list_arg} ${default="" interval_list} \
+             --annotation-default normal_barcode:${default="Unknown" control_id} \
+             --annotation-default tumor_barcode:${default="Unknown" case_id} \
+             --annotation-default Center:${default="Unknown" sequencing_center} \
+             --annotation-default source:${default="Unknown" sequence_source} \
              ${"--transcript-selection-mode " + transcript_selection_mode} \
-             ${"--transcript-list " + transcript_selection_list} \
-            --annotation-default normal_barcode:${control_id} \
-            --annotation-default tumor_barcode:${case_id} \
-            --annotation-default Center:${default="Unknown" sequencing_center} \
-            --annotation-default source:${default="Unknown" sequence_source} \
+             ${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \
              ${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \
              ${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides} \
              ${excluded_fields_args}${default="" sep=" --exclude-field " funcotator_excluded_fields} \
              ${filter_funcotations_args} \
-             ${extra_args}
+             ${extra_args_arg}
+         # Make sure we have a placeholder index for MAF files so this workflow doesn't fail:
+         if [[ "${output_format}" == "MAF" ]] ; then
+            touch ${output_maf_index}
+         fi
      >>>
 
      runtime {
@@ -1258,6 +1290,7 @@ task FuncotateMaf {
      }
 
      output {
-         File funcotated_output = "${final_output_filename}"
+         File funcotated_output_file = "${output_file}"
+         File funcotated_output_file_index = "${output_file_index}"
      }
  }
diff --git a/mutect2_pon.inputs.json b/mutect2_pon.inputs.json
index b560c2d..26524bc 100644
--- a/mutect2_pon.inputs.json
+++ b/mutect2_pon.inputs.json
@@ -1,7 +1,7 @@
 {
   "##_COMMENT1": "Inputs",
-  "Mutect2_Panel.normal_bams": "Array[File]",
-  "Mutect2_Panel.normal_bais": "Array[File]",
+  "Mutect2_Panel.normal_reads": "Array[File]",
+  "Mutect2_Panel.normal_reads_index": "Array[File]",
   "Mutect2_Panel.pon_name": "String",
   "#Mutect2_Panel.Mutect2.normal_bam": "(optional) File?",
   "#Mutect2_Panel.Mutect2.normal_bai": "(optional) File?",
@@ -49,7 +49,7 @@
   "#Mutect2_Panel.Mutect2.run_orientation_bias_filter": "(optional) Boolean?",
 
   "##_COMMENT6": "Docker",
-  "Mutect2_Panel.gatk_docker": "broadinstitute/gatk:4.0.8.1",
+  "Mutect2_Panel.gatk_docker": "broadinstitute/gatk:4.1.1.0",
   "#Mutect2_Panel.Mutect2.oncotator_docker": "(optional) String?",
 
   "##_COMMENT7": "Disk space",
diff --git a/mutect2_pon.wdl b/mutect2_pon.wdl
index 552f2c8..de91ca4 100644
--- a/mutect2_pon.wdl
+++ b/mutect2_pon.wdl
@@ -1,20 +1,15 @@
 #  Create a Mutect2 panel of normals
 #
 #  Description of inputs
-#  gatk: java jar file containing gatk 4
 #  intervals: genomic intervals
 #  ref_fasta, ref_fai, ref_dict: reference genome, index, and dictionary
-#  normal_bams, normal_bais: arrays of normal bams and bam indices
+#  normal_bams: arrays of normal bams
 #  scatter_count: number of parallel jobs when scattering over intervals
 #  pon_name: the resulting panel of normals is {pon_name}.vcf
-#  duplicate_sample_strategy: THROW_ERROR (default if left empty) to fail if mulitple bams have the same sample name,
-#                               CHOOSE_FIRST to use only the first bam with a given sample name, ALLOW_ALL to use all bams
-#
-# This WDL needs to decide whether to use the ``gatk_jar`` or ``gatk_jar_override`` for the jar location.  As of cromwell-0.24,
-#   this logic *must* go into each task.  Therefore, there is a lot of duplicated code.  This allows users to specify a jar file
-#   independent of what is in the docker file.  See the README.md for more info.
-#
-import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-snvs-indels/2.3.0/mutect2_nio.wdl" as m2
+#  m2_extra_args: additional command line parameters for Mutect2.  This should not involve --max-mnp-distance,
+#  which the wdl hard-codes to 0 because GenpmicsDBImport can't handle MNPs
+
+import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-snvs-indels/2.4.0/mutect2_nio.wdl" as m2
 
 workflow Mutect2_Panel {
     # inputs
@@ -23,12 +18,18 @@ workflow Mutect2_Panel {
 	File ref_fai
 	File ref_dict
 	Int scatter_count
-	Array[File] normal_bams
-	Array[File] normal_bais
+	Array[String] normal_bams
+	Array[String] normal_bais
+	String gnomad
     String? m2_extra_args
-    String? duplicate_sample_strategy
+    String? create_pon_extra_args
+    Boolean? compress
     String pon_name
 
+    Int? min_contig_size
+    Int? num_contigs
+    Int contig_size = select_first([min_contig_size, 1000000])
+
     File? gatk_override
 
     # runtime
@@ -36,55 +37,84 @@ workflow Mutect2_Panel {
     Int? preemptible_attempts
     Int? max_retries
 
-    Array[Pair[File,File]] normal_bam_pairs = zip(normal_bams, normal_bais)
-
-    scatter (normal_bam_pair in normal_bam_pairs) {
-        File normal_bam = normal_bam_pair.left
-        File normal_bai = normal_bam_pair.right
-
+    scatter (normal_bam in zip(normal_bams, normal_bais)) {
         call m2.Mutect2 {
             input:
                 intervals = intervals,
                 ref_fasta = ref_fasta,
                 ref_fai = ref_fai,
                 ref_dict = ref_dict,
-                tumor_bam = normal_bam,
-                tumor_bai = normal_bai,
+                tumor_reads = normal_bam.left,
+                tumor_reads_index = normal_bam.right,
                 scatter_count = scatter_count,
-                m2_extra_args = m2_extra_args,
+                m2_extra_args = select_first([m2_extra_args, ""]) + "--max-mnp-distance 0",
                 gatk_override = gatk_override,
                 gatk_docker = gatk_docker,
                 preemptible_attempts = preemptible_attempts,
-                max_retries = max_retries
+                max_retries = max_retries,
+                run_orientation_bias_mixture_model_filter = false
         }
     }
 
-    call CreatePanel {
+    call m2.SplitIntervals {
         input:
-            input_vcfs = Mutect2.filtered_vcf,
-            input_vcfs_idx = Mutect2.filtered_vcf_index,
-            duplicate_sample_strategy = duplicate_sample_strategy,
-            output_vcf_name = pon_name,
+            ref_fasta = ref_fasta,
+            ref_fai = ref_fai,
+            ref_dict = ref_dict,
+            scatter_count = select_first([num_contigs, 24]),
+            split_intervals_extra_args = "--subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION --min-contig-size " + contig_size,
             gatk_override = gatk_override,
-            preemptible_attempts = preemptible_attempts,
-            max_retries = max_retries,
             gatk_docker = gatk_docker
     }
 
+    scatter (subintervals in SplitIntervals.interval_files ) {
+            call CreatePanel {
+                input:
+                    input_vcfs = Mutect2.filtered_vcf,
+                    intervals = subintervals,
+                    ref_fasta = ref_fasta,
+                    ref_fai = ref_fai,
+                    ref_dict = ref_dict,
+                    gnomad = gnomad,
+                    output_vcf_name = pon_name,
+                    create_pon_extra_args = create_pon_extra_args,
+                    gatk_override = gatk_override,
+                    preemptible_attempts = preemptible_attempts,
+                    max_retries = max_retries,
+                    gatk_docker = gatk_docker
+            }
+    }
+
+    call m2.MergeVCFs {
+        input:
+            input_vcfs = CreatePanel.output_vcf,
+            input_vcf_indices = CreatePanel.output_vcf_index,
+            output_name = pon_name,
+            compress = select_first([compress, false]),
+            gatk_override = gatk_override,
+            gatk_docker = gatk_docker,
+            preemptible_attempts = preemptible_attempts,
+            max_retries = max_retries
+    }
+
     output {
-        File pon = CreatePanel.output_vcf
-        File pon_idx = CreatePanel.output_vcf_index
+        File pon = MergeVCFs.merged_vcf
+        File pon_idx = MergeVCFs.merged_vcf_idx
         Array[File] normal_calls = Mutect2.filtered_vcf
-        Array[File] normal_calls_idx = Mutect2.filtered_vcf_index
+        Array[File] normal_calls_idx = Mutect2.filtered_vcf_idx
     }
 }
 
 task CreatePanel {
     # inputs
-    Array[File] input_vcfs
-    Array[File] input_vcfs_idx
-    String? duplicate_sample_strategy
+    File intervals
+    Array[String] input_vcfs
+    File ref_fasta
+    File ref_fai
+    File ref_dict
     String output_vcf_name
+    String gnomad
+    String? create_pon_extra_args
 
     File? gatk_override
 
@@ -94,26 +124,27 @@ task CreatePanel {
     Int? preemptible_attempts
     Int? max_retries
     Int? disk_space
-    Int? cpu
-    Boolean use_ssd = false
 
-    Int machine_mem = select_first([mem, 3])
+    Int machine_mem = select_first([mem, 8])
     Int command_mem = machine_mem - 1
 
     command {
         set -e
         export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}
-        gatk --java-options "-Xmx${command_mem}g"  CreateSomaticPanelOfNormals -vcfs ${sep=' -vcfs ' input_vcfs} ${"-duplicate-sample-strategy " + duplicate_sample_strategy} -O ${output_vcf_name}.vcf
+
+        gatk GenomicsDBImport --genomicsdb-workspace-path pon_db -R ${ref_fasta} -V ${sep=' -V ' input_vcfs} -L ${intervals}
+
+        gatk --java-options "-Xmx${command_mem}g"  CreateSomaticPanelOfNormals -R ${ref_fasta} --germline-resource ${gnomad} \
+            -V gendb://pon_db -O ${output_vcf_name}.vcf ${create_pon_extra_args}
     }
 
     runtime {
         docker: gatk_docker
         bootDiskSizeGb: 12
         memory: machine_mem + " GB"
-        disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
+        disks: "local-disk " + select_first([disk_space, 100]) + " HDD"
         preemptible: select_first([preemptible_attempts, 3])
         maxRetries: select_first([max_retries, 0])
-        cpu: select_first([cpu, 1])
     }
 
     output {