From 132415d249c900c39e0a7587b594a12639cb2faa Mon Sep 17 00:00:00 2001
From: Jin wook Lee <leepc12@gmail.com>
Date: Mon, 15 Nov 2021 22:58:35 -0800
Subject: [PATCH 1/5] fix docstore-dx issue

---
 chip.wdl | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/chip.wdl b/chip.wdl
index 5214f44..910b1a4 100644
--- a/chip.wdl
+++ b/chip.wdl
@@ -1257,11 +1257,11 @@ workflow chip {
             else select_first([paired_end])
 
         Boolean has_input_of_align = i<length(fastqs_R1) && length(fastqs_R1[i])>0
-        Boolean has_output_of_align = i<length(bams) && defined(bams[i])
+        Boolean has_output_of_align = i<length(bams)
         if ( has_input_of_align && !has_output_of_align ) {
             call align { input :
                 fastqs_R1 = fastqs_R1[i],
-                fastqs_R2 = fastqs_R2[i],
+                fastqs_R2 = if paired_end_ then fastqs_R2[i] else [],
                 crop_length = crop_length,
                 crop_length_tol = crop_length_tol,
                 trimmomatic_phred_score_format = trimmomatic_phred_score_format,
@@ -1289,7 +1289,7 @@ workflow chip {
         File? bam_ = if has_output_of_align then bams[i] else align.bam
 
         Boolean has_input_of_filter = has_output_of_align || defined(align.bam)
-        Boolean has_output_of_filter = i<length(nodup_bams) && defined(nodup_bams[i])
+        Boolean has_output_of_filter = i<length(nodup_bams)
         # skip if we already have output of this step
         if ( has_input_of_filter && !has_output_of_filter ) {
             call filter { input :
@@ -1315,7 +1315,7 @@ workflow chip {
         File? nodup_bam_ = if has_output_of_filter then nodup_bams[i] else filter.nodup_bam
 
         Boolean has_input_of_bam2ta = has_output_of_filter || defined(filter.nodup_bam)
-        Boolean has_output_of_bam2ta = i<length(tas) && defined(tas[i])
+        Boolean has_output_of_bam2ta = i<length(tas)
         if ( has_input_of_bam2ta && !has_output_of_bam2ta ) {
             call bam2ta { input :
                 bam = nodup_bam_,
@@ -1490,7 +1490,7 @@ workflow chip {
 
         # before peak calling, get fragment length from xcor analysis or given input
         # if fraglen [] is defined in the input JSON, fraglen from xcor will be ignored
-        Int? fraglen_ = if i<length(fraglen) && defined(fraglen[i]) then fraglen[i]
+        Int? fraglen_ = if i<length(fraglen) then fraglen[i]
             else xcor.fraglen
     }
 
@@ -1502,11 +1502,11 @@ workflow chip {
             else select_first([ctl_paired_end, paired_end])
 
         Boolean has_input_of_align_ctl = i<length(ctl_fastqs_R1) && length(ctl_fastqs_R1[i])>0
-        Boolean has_output_of_align_ctl = i<length(ctl_bams) && defined(ctl_bams[i])
+        Boolean has_output_of_align_ctl = i<length(ctl_bams)
         if ( has_input_of_align_ctl && !has_output_of_align_ctl ) {
             call align as align_ctl { input :
                 fastqs_R1 = ctl_fastqs_R1[i],
-                fastqs_R2 = ctl_fastqs_R2[i],
+                fastqs_R2 = if ctl_paired_end_ then ctl_fastqs_R2[i] else [],
                 crop_length = crop_length,
                 crop_length_tol = crop_length_tol,
                 trimmomatic_phred_score_format = trimmomatic_phred_score_format,
@@ -1534,7 +1534,7 @@ workflow chip {
         File? ctl_bam_ = if has_output_of_align_ctl then ctl_bams[i] else align_ctl.bam
 
         Boolean has_input_of_filter_ctl = has_output_of_align_ctl || defined(align_ctl.bam)
-        Boolean has_output_of_filter_ctl = i<length(ctl_nodup_bams) && defined(ctl_nodup_bams[i])
+        Boolean has_output_of_filter_ctl = i<length(ctl_nodup_bams)
         # skip if we already have output of this step
         if ( has_input_of_filter_ctl && !has_output_of_filter_ctl ) {
             call filter as filter_ctl { input :
@@ -1560,7 +1560,7 @@ workflow chip {
         File? ctl_nodup_bam_ = if has_output_of_filter_ctl then ctl_nodup_bams[i] else filter_ctl.nodup_bam
 
         Boolean has_input_of_bam2ta_ctl = has_output_of_filter_ctl || defined(filter_ctl.nodup_bam)
-        Boolean has_output_of_bam2ta_ctl = i<length(ctl_tas) && defined(ctl_tas[i])
+        Boolean has_output_of_bam2ta_ctl = i<length(ctl_tas)
         if ( has_input_of_bam2ta_ctl && !has_output_of_bam2ta_ctl ) {
             call bam2ta as bam2ta_ctl { input :
                 bam = ctl_nodup_bam_,
@@ -3268,13 +3268,11 @@ task rounded_mean {
 task raise_exception {
     input {
         String msg
-        Array[String]? vals
 
         RuntimeEnvironment runtime_environment
     }
     command {
         echo -e "\n* Error: ${msg}\n" >&2
-        echo -e "* Vals: ${sep=',' vals}\n" >&2
         exit 2
     }
     output {

From a989f62f401d8f3ce770ab4a205792e067924f80 Mon Sep 17 00:00:00 2001
From: Jin wook Lee <leepc12@gmail.com>
Date: Mon, 15 Nov 2021 22:59:04 -0800
Subject: [PATCH 2/5] bump ver

---
 chip.wdl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/chip.wdl b/chip.wdl
index 910b1a4..5cc7a45 100644
--- a/chip.wdl
+++ b/chip.wdl
@@ -7,10 +7,10 @@ struct RuntimeEnvironment {
 }
 
 workflow chip {
-    String pipeline_ver = 'v2.1.0'
+    String pipeline_ver = 'v2.1.1'
 
     meta {
-        version: 'v2.1.0'
+        version: 'v2.1.1'
 
         author: 'Jin wook Lee'
         email: 'leepc12@gmail.com'
@@ -19,8 +19,8 @@ workflow chip {
 
         specification_document: 'https://docs.google.com/document/d/1lG_Rd7fnYgRpSIqrIfuVlAz2dW1VaSQThzk836Db99c/edit?usp=sharing'
 
-        default_docker: 'encodedcc/chip-seq-pipeline:v2.1.0'
-        default_singularity: 'library://leepc12/default/chip-seq-pipeline:v2.1.0'
+        default_docker: 'encodedcc/chip-seq-pipeline:v2.1.1'
+        default_singularity: 'library://leepc12/default/chip-seq-pipeline:v2.1.1'
         croo_out_def: 'https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v5.json'
 
         parameter_group: {
@@ -71,8 +71,8 @@ workflow chip {
     }
     input {
         # group: runtime_environment
-        String docker = 'encodedcc/chip-seq-pipeline:v2.1.0'
-        String singularity = 'library://leepc12/default/chip-seq-pipeline:v2.1.0'
+        String docker = 'encodedcc/chip-seq-pipeline:v2.1.1'
+        String singularity = 'library://leepc12/default/chip-seq-pipeline:v2.1.1'
         String conda = 'encode-chip-seq-pipeline'
         String conda_macs2 = 'encode-chip-seq-pipeline-macs2'
         String conda_spp = 'encode-chip-seq-pipeline-spp'

From 56e51f7bbfa4a6f615cda6ab2b64d33db51f781b Mon Sep 17 00:00:00 2001
From: Jin wook Lee <leepc12@gmail.com>
Date: Tue, 16 Nov 2021 08:30:13 -0800
Subject: [PATCH 3/5] fix test

---
 dev/test/test_task/test_choose_ctl.wdl | 31 +++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/dev/test/test_task/test_choose_ctl.wdl b/dev/test/test_task/test_choose_ctl.wdl
index 96262b4..f8088fc 100644
--- a/dev/test/test_task/test_choose_ctl.wdl
+++ b/dev/test/test_task/test_choose_ctl.wdl
@@ -191,7 +191,7 @@ workflow test_choose_ctl {
         String k = test.left
         Pair[Int, Int] v = test.right
         if ( v.left != v.right ) {
-            call chip.raise_exception { input:
+            call raise_exception_and_print { input:
                 msg = k,
                 vals = [v.left, v.right],
                 runtime_environment = runtime_environment,
@@ -199,3 +199,32 @@ workflow test_choose_ctl {
         }
     }
 }
+
+
+task raise_exception_and_print {
+    input {
+        String msg
+        Array[String]? vals
+
+        RuntimeEnvironment runtime_environment
+    }
+    command {
+        echo -e "\n* Error: ${msg}\n" >&2
+        echo -e "* Vals: ${sep=',' vals}\n" >&2
+        exit 2
+    }
+    output {
+        String error_msg = '${msg}'
+    }
+    runtime {
+        maxRetries : 0
+        cpu : 1
+        memory : '2 GB'
+        time : 4
+        disks : 'local-disk 10 SSD'
+
+        docker : runtime_environment.docker
+        singularity : runtime_environment.singularity
+        conda : runtime_environment.conda
+    }
+}

From 6d4750e929a8e1924fad9a21592c9f4841457ac0 Mon Sep 17 00:00:00 2001
From: Jin wook Lee <leepc12@gmail.com>
Date: Tue, 16 Nov 2021 10:29:17 -0800
Subject: [PATCH 4/5] update doc

---
 README.md | 47 +++++++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 9db41d6..ee18af7 100644
--- a/README.md
+++ b/README.md
@@ -3,14 +3,14 @@
 [![CircleCI](https://circleci.com/gh/ENCODE-DCC/chip-seq-pipeline2/tree/master.svg?style=svg)](https://circleci.com/gh/ENCODE-DCC/chip-seq-pipeline2/tree/master)
 
 
-## Download new Caper>=2.0
+## Download new Caper>=2.1
 
 New Caper is out. You need to update your Caper to work with the latest ENCODE ChIP-seq pipeline.
 ```bash
 $ pip install caper --upgrade
 ```
 
-## Local/HPC users and new Caper>=2.0
+## Local/HPC users and new Caper>=2.1
 
 There are tons of changes for local/HPC backends: `local`, `slurm`, `sge`, `pbs` and `lsf`(added). Make a backup of your current Caper configuration file `~/.caper/default.conf` and run `caper init`. Local/HPC users need to reset/initialize Caper's configuration file according to your chosen backend. Edit the configuration file and follow instructions in there.
 ```bash
@@ -72,10 +72,19 @@ This ChIP-Seq pipeline is based off the ENCODE (phase-3) transcription factor an
 	$ bash scripts/install_conda_env.sh
 	```
 
+## Input JSON file
+
+> **IMPORTANT**: DO NOT BLINDLY USE A TEMPLATE/EXAMPLE INPUT JSON. READ THROUGH THE FOLLOWING GUIDE TO MAKE A CORRECT INPUT JSON FILE.
+
+An input JSON file specifies all the input parameters and files that are necessary for successfully running this pipeline. This includes a specification of the path to the genome reference files and the raw data fastq file. Please make sure to specify absolute paths rather than relative paths in your input JSON files.
+
+1) [Input JSON file specification (short)](docs/input_short.md)
+2) [Input JSON file specification (long)](docs/input.md)
 
-## Test run
 
-You can use URIs(`s3://`, `gs://` and `http(s)://`) in Caper's command lines and input JSON file then Caper will automatically download/localize such files. Input JSON file URL: https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI_subsampled_chr19_only.json
+## Running on local computer/HPCs
+
+You can use URIs(`s3://`, `gs://` and `http(s)://`) in Caper's command lines and input JSON file then Caper will automatically download/localize such files. Input JSON file example: https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI_subsampled_chr19_only.json
 
 According to your chosen platform of Caper, run Caper or submit Caper command line to the cluster. You can choose other environments like `--singularity` or `--docker` instead of `--conda`. But you must define one of the environments.
 
@@ -87,10 +96,16 @@ The followings are just examples. Please read [Caper's README](https://github.co
     # Or submit it as a leader job (with long/enough resources) to SLURM (Stanford Sherlock) with Singularity
     # It will fail if you directly run the leader job on login nodes
     $ sbatch -p [SLURM_PARTITON] -J [WORKFLOW_NAME] --export=ALL --mem 4G -t 4-0 --wrap "caper chip chip.wdl -i https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI_subsampled_chr19_only.json --singularity"
+
+    # Check status of your leader job
+    $ squeue -u $USER | grep [WORKFLOW_NAME]
+
+    # Cancel the leader node to close all of its children jobs
+    $ scancel -j [JOB_ID]    
 	```
 
 
-## Running a pipeline on Terra/Anvil (using Dockstore)
+## Running on Terra/Anvil (using Dockstore)
 
 Visit our pipeline repo on [Dockstore](https://dockstore.org/workflows/github.com/ENCODE-DCC/chip-seq-pipeline2). Click on `Terra` or `Anvil`. Follow Terra's instruction to create a workspace on Terra and add Terra's billing bot to your Google Cloud account.
 
@@ -99,7 +114,7 @@ Download this [test input JSON for Terra](https://storage.googleapis.com/encode-
 If you want to use your own input JSON file, then make sure that all files in the input JSON are on a Google Cloud Storage bucket (`gs://`). URLs will not work.
 
 
-## Running a pipeline on DNAnexus (using Dockstore)
+## Running on DNAnexus (using Dockstore)
 
 Sign up for a new account on [DNAnexus](https://platform.dnanexus.com/) and create a new project on either AWS or Azure. Visit our pipeline repo on [Dockstore](https://dockstore.org/workflows/github.com/ENCODE-DCC/chip-seq-pipeline2). Click on `DNAnexus`. Choose a destination directory on your DNAnexus project. Click on `Submit` and visit DNAnexus. This will submit a conversion job so that you can check status of it on `Monitor` on DNAnexus UI.
 
@@ -107,23 +122,23 @@ Once conversion is done download one of the following input JSON files according
 - AWS: https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI_subsampled_chr19_only_dx.json
 - Azure: https://storage.googleapis.com/encode-pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI_subsampled_chr19_only_dx_azure.json
 
-You cannot use these input JSON files directly. Go to the destination directory on DNAnexus and click on the converted workflow `chip`. You will see input file boxes in the left-hand side of the task graph. Expand it and define FASTQs (`fastq_repX_R1`) and `genome_tsv` as in the downloaded input JSON file. Click on the `common` task box and define other non-file pipeline parameters.
-
+You cannot use these input JSON files directly. Go to the destination directory on DNAnexus and click on the converted workflow `chip`. You will see input file boxes in the left-hand side of the task graph. Expand it and define FASTQs (`fastq_repX_R1` and `fastq_repX_R1`) and `genome_tsv` as in the downloaded input JSON file. Click on the `common` task box and define other non-file pipeline parameters.  e.g. `pipeline_type`, `paired_end` and `ctl_paired_end`.
 
-## Running a pipeline on DNAnexus (using our pre-built workflows)
+We have a separate project on DNANexus to provide example FASTQs and `genome_tsv` for `hg38` and `mm10` (also chr19-only version of those two. Use chr19-only versions for testing). We recommend to make copies of these directories on your own project.
 
-See [this](docs/tutorial_dx_web.md) for details.
+`genome_tsv`
+- AWS: https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-genome-data/genome_tsv/v3
+- Azure: https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-genome-data/genome_tsv/v3
 
+Example FASTQs
+- AWS: https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled
+- Azure: https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled
 
 
-## Input JSON file
-
-> **IMPORTANT**: DO NOT BLINDLY USE A TEMPLATE/EXAMPLE INPUT JSON. READ THROUGH THE FOLLOWING GUIDE TO MAKE A CORRECT INPUT JSON FILE.
+## Running on DNAnexus (using our pre-built workflows)
 
-An input JSON file specifies all the input parameters and files that are necessary for successfully running this pipeline. This includes a specification of the path to the genome reference files and the raw data fastq file. Please make sure to specify absolute paths rather than relative paths in your input JSON files.
+See [this](docs/tutorial_dx_web.md) for details.
 
-1) [Input JSON file specification (short)](docs/input_short.md)
-2) [Input JSON file specification (long)](docs/input.md)
 
 ## Running and sharing on Truwl
 You can run this pipeline on [truwl.com](https://truwl.com/). This provides a web interface that allows you to define inputs and parameters, run the job on GCP, and monitor progress. To run it you will need to create an account on the platform then request early access by emailing [info@truwl.com](mailto:info@truwl.com) to get the right permissions. You can see the example cases from this repo at [https://truwl.com/workflows/instance/WF_dd6938.8f.340f/command](https://truwl.com/workflows/instance/WF_dd6938.8f.340f/command) and [https://truwl.com/workflows/instance/WF_dd6938.8f.8aa3/command](https://truwl.com/workflows/instance/WF_dd6938.8f.8aa3/command). The example jobs (or other jobs) can be forked to pre-populate the inputs for your own job.

From d8bdd2aebd85721150730d3459a1a56097598e04 Mon Sep 17 00:00:00 2001
From: Jin wook Lee <leepc12@gmail.com>
Date: Tue, 16 Nov 2021 10:44:22 -0800
Subject: [PATCH 5/5] upgrade genome_tsv in test inpu json:v1->v3

---
 example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json | 2 +-
 .../dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json           | 2 +-
 .../dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json b/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json
index 61fd9ee..e3221eb 100644
--- a/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json
+++ b/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json
@@ -1,6 +1,6 @@
 {
     "chip.pipeline_type" : "tf",
-    "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v1/hg38_chr19_chrM_dx.tsv",
+    "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.dx.tsv",
     "chip.fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz"
     ],
     "chip.fastqs_rep2_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.20.fastq.gz"
diff --git a/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json b/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json
index 35428fa..fb3cfad 100644
--- a/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json
+++ b/example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json
@@ -1,6 +1,6 @@
 {
     "chip.pipeline_type" : "tf",
-    "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v1/hg38_chr19_chrM_dx.tsv",
+    "chip.genome_tsv" : "dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.dx.tsv",
     "chip.fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz"
     ],
     "chip.ctl_fastqs_rep1_R1" : ["dx://project-BKpvFg00VBPV975PgJ6Q03v6:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/ctl1.subsampled.25.fastq.gz"
diff --git a/example_input_json/dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json b/example_input_json/dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json
index 6fc7d93..d593256 100644
--- a/example_input_json/dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json
+++ b/example_input_json/dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json
@@ -1,6 +1,6 @@
 {
     "chip.pipeline_type" : "tf",
-    "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v1/hg38_chr19_chrM_dx_azure.tsv",
+    "chip.genome_tsv" : "dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-genome-data/genome_tsv/v3/hg38_chr19_chrM.dx_azure.tsv",
     "chip.fastqs_rep1_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep1.subsampled.25.fastq.gz"
     ],
     "chip.fastqs_rep2_R1" : ["dx://project-F6K911Q9xyfgJ36JFzv03Z5J:/pipeline-test-samples/encode-chip-seq-pipeline/ENCSR000DYI/fastq_subsampled/rep2.subsampled.15.fastq.gz"