nextflow-io · adamrtalbot · Nov 6, 2024
diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf
@@ -8,62 +8,97 @@
 params.reads_bam = "${projectDir}/data/sample_bams.txt"
 
 // Accessory files
-params.reference        = "${projectDir}/data/ref/ref.fasta"
-params.reference_index  = "${projectDir}/data/ref/ref.fasta.fai"
-params.reference_dict   = "${projectDir}/data/ref/ref.dict"
-params.intervals        = "${projectDir}/data/ref/intervals.bed"
+params.reference = "${projectDir}/data/ref/ref.fasta"
+params.reference_index = "${projectDir}/data/ref/ref.fasta.fai"
+params.reference_dict = "${projectDir}/data/ref/ref.dict"
+params.intervals = "${projectDir}/data/ref/intervals.bed"
 
 // Base name for final output file
 params.cohort_name = "family_trio"
 
+workflow {
+
+    // Create input channel from a text file listing input file paths
+    reads_ch = Channel.fromPath(params.reads_bam).splitText()
+
+    // Load the file paths for the accessory files (reference and intervals)
+    ref_file = file(params.reference)
+    ref_index_file = file(params.reference_index)
+    ref_dict_file = file(params.reference_dict)
+    intervals_file = file(params.intervals)
+
+    // Create index file for input BAM file
+    SAMTOOLS_INDEX(reads_ch)
+
+    // Call variants from the indexed BAM file
+    GATK_HAPLOTYPECALLER(
+        SAMTOOLS_INDEX.out,
+        ref_file,
+        ref_index_file,
+        ref_dict_file,
+        intervals_file
+    )
+
+    // Collect variant calling outputs across samples
+    all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect()
+    all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect()
+
+    // Combine GVCFs into a GenomicsDB data store and apply joint genotyping
+    GATK_JOINTGENOTYPING(
+        all_gvcfs_ch,
+        all_idxs_ch,
+        intervals_file,
+        params.cohort_name,
+        ref_file,
+        ref_index_file,
+        ref_dict_file
+    )
+}
+
 /*
  * Generate BAM index file
  */
 process SAMTOOLS_INDEX {
-
     container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464'
-
     publishDir 'results_genomics', mode: 'symlink'
 
     input:
-        path input_bam
+    path input_bam
 
     output:
-        tuple path(input_bam), path("${input_bam}.bai")
+    tuple path(input_bam), path("${input_bam}.bai")
 
     script:
     """
-    samtools index '$input_bam'
+    samtools index '${input_bam}'
     """
 }
 
 /*
  * Call variants with GATK HaplotypeCaller
  */
 process GATK_HAPLOTYPECALLER {
-
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
-
     publishDir 'results_genomics', mode: 'symlink'
 
     input:
-        tuple path(input_bam), path(input_bam_index)
-        path ref_fasta
-        path ref_index
-        path ref_dict
-        path interval_list
+    tuple path(input_bam), path(input_bam_index)
+    path ref_fasta
+    path ref_index
+    path ref_dict
+    path interval_list
 
     output:
-        path "${input_bam}.g.vcf"     , emit: vcf
-        path "${input_bam}.g.vcf.idx" , emit: idx
+    path "${input_bam}.g.vcf", emit: vcf
+    path "${input_bam}.g.vcf.idx", emit: idx
 
     script:
     """
-    gatk HaplotypeCaller \
-        -R ${ref_fasta} \
-        -I ${input_bam} \
-        -O ${input_bam}.g.vcf \
-        -L ${interval_list} \
+    gatk HaplotypeCaller \\
+        -R ${ref_fasta} \\
+        -I ${input_bam} \\
+        -O ${input_bam}.g.vcf \\
+        -L ${interval_list} \\
         -ERC GVCF
     """
 }
@@ -72,75 +107,34 @@ process GATK_HAPLOTYPECALLER {
  * Combine GVCFs into GenomicsDB datastore and run joint genotyping to produce cohort-level calls
  */
 process GATK_JOINTGENOTYPING {
-
     container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867"
-
     publishDir 'results_genomics', mode: 'symlink'
 
     input:
-        path all_gvcfs
-        path all_idxs
-        path interval_list
-        val cohort_name
-        path ref_fasta
-        path ref_index
-        path ref_dict
+    path all_gvcfs
+    path all_idxs
+    path interval_list
+    val cohort_name
+    path ref_fasta
+    path ref_index
+    path ref_dict
 
     output:
-        path "${cohort_name}.joint.vcf"
-        path "${cohort_name}.joint.vcf.idx"
+    path "${cohort_name}.joint.vcf"
+    path "${cohort_name}.joint.vcf.idx"
 
     script:
-        def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ')
+    def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ')
     """
-    gatk GenomicsDBImport \
-        ${gvcfs_line} \
-        -L ${interval_list} \
+    gatk GenomicsDBImport \\
+        ${gvcfs_line} \\
+        -L ${interval_list} \\
         --genomicsdb-workspace-path ${cohort_name}_gdb
 
-    gatk GenotypeGVCFs \
-        -R ${ref_fasta} \
-        -V gendb://${cohort_name}_gdb \
-        -L ${interval_list} \
+    gatk GenotypeGVCFs \\
+        -R ${ref_fasta} \\
+        -V gendb://${cohort_name}_gdb \\
+        -L ${interval_list} \\
         -O ${cohort_name}.joint.vcf
     """
 }
-
-workflow {
-
-    // Create input channel from a text file listing input file paths
-    reads_ch = Channel.fromPath(params.reads_bam).splitText()
-
-    // Load the file paths for the accessory files (reference and intervals)
-    ref_file        = file(params.reference)
-    ref_index_file  = file(params.reference_index)
-    ref_dict_file   = file(params.reference_dict)
-    intervals_file  = file(params.intervals)
-
-    // Create index file for input BAM file
-    SAMTOOLS_INDEX(reads_ch)
-
-    // Call variants from the indexed BAM file
-    GATK_HAPLOTYPECALLER(
-        SAMTOOLS_INDEX.out,
-        ref_file,
-        ref_index_file,
-        ref_dict_file,
-        intervals_file
-    )
-
-    // Collect variant calling outputs across samples
-    all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect()
-    all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect()
-
-    // Combine GVCFs into a GenomicsDB data store and apply joint genotyping
-    GATK_JOINTGENOTYPING(
-        all_gvcfs_ch,
-        all_idxs_ch,
-        intervals_file,
-        params.cohort_name,
-        ref_file,
-        ref_index_file,
-        ref_dict_file
-    )
-}
diff --git a/hello-nextflow/hello-containers.nf b/hello-nextflow/hello-containers.nf
@@ -8,55 +8,54 @@ params.input_file = "containers/data/greetings.csv"
 // 'meow', 'miki', 'milk', 'octopus', 'pig', 'stegosaurus', 'stimpy', 'trex', 'turkey', 'turtle', 'tux'
 params.character = "cow"
 
+workflow {
+
+    // create a channel for inputs from a CSV file
+    input_ch = Channel
+        .fromPath(params.input_file)
+        .splitCsv()
+        .flatten()
+
+    sayHello(input_ch)
+
+    // cowSay the text
+    cowSay(sayHello.out)
+}
+
 /*
  * Use echo to print 'Hello World!' to standard out
  */
 process sayHello {
-
     publishDir 'containers/results', mode: 'copy'
 
     input:
-        val greeting
+    val greeting
 
     output:
-        path "output-*.txt"
+    path "output-*.txt"
 
     script:
-        // Replace the spaces in the greeting with hyphens for the output filename
-        def safe_greeting = greeting.tokenize(' ').join('-')
-        """
-        echo '$greeting' > 'output-${safe_greeting}.txt'
+    // Replace the spaces in the greeting with hyphens for the output filename
+    def safe_greeting = greeting.tokenize(' ').join('-')
+    """
+        echo '${greeting}' > 'output-${safe_greeting}.txt'
         """
 }
 
 /*
  * Use a cow (or other character) to say some text
  */
 process cowSay {
-
     publishDir 'containers/results', mode: 'copy'
 
     input:
-        path input_file
+    path input_file
 
     output:
-        path "cowsay-*"
+    path "cowsay-*"
 
     script:
     """
-    cowsay -c "$params.character" -t "\$(cat $input_file)" > cowsay-${input_file}
+    cowsay -c "${params.character}" -t "\\$(cat ${input_file})" > cowsay-${input_file}
     """
 }
-
-workflow {
-
-    // create a channel for inputs from a CSV file
-    input_ch = Channel.fromPath(params.input_file)
-        .splitCsv()
-        .flatten()
-
-    sayHello(input_ch)
-
-    // cowSay the text
-    cowSay(sayHello.out)
-}