Added subworkflows and config (#2)

* updated nextflow.config with pipeline rename to omicsgenetraitassociation * Template update for nf-core/tools version 2.11.1 * added modules CMA, MMAP, and PASCAL * fixed versions emit values for cma/main.nf and pascal/main.nf * added when directives to cma/main.nf * added MEA modules * pipeline works from CMA to MEA * added config for CMA and MEA configs in modules.config * updated .gitignore to exclude downloaded singularity images and added new config file for testing cma + mea modules * added temporary subworkflow CMA_SUBWORKFLOW which will later be modified to run CMA and MEA * commented out workflow initialization and validation steps for testing * pascal/main.nf edited to extract tarball as nextflow does not support s3 glob support * fixed versions emit name * added test_local.config, modules.config, and updated subworkflows
nf-core · Jan 23, 2024 · b25be9b · b25be9b
1 parent 7692913
commit b25be9b
Show file tree

Hide file tree

Showing 6 changed files with 204 additions and 20 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -18,6 +18,22 @@ process {
         saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
     ]
 
+    withName: MMAP {
+      publishDir = [
+        path: { "${params.outdir}/MMAP/mmap" },
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> filename.endsWith('.poly.cov.csv') ? filename : null }
+      ]
+    }
+
+    withName: MMAP_PARSE {
+      publishDir = [
+        path: { "${params.outdir}/MMAP" },
+        mode: params.publish_dir_mode,
+        saveAs: { filename -> filename.startsWith('parsed_output_') ? filename : null }
+      ]
+    }
+
     withName: PREPROCESS_PASCAL {
       publishDir = [
         path: { "${params.outdir}/MEA" },
@@ -86,25 +102,24 @@ process {
         ]
     }
 
-    withName: FASTQC {
-        ext.args = '--quiet'
-    }
-
-    withName: CUSTOM_DUMPSOFTWAREVERSIONS {
-        publishDir = [
-            path: { "${params.outdir}/pipeline_info" },
-            mode: params.publish_dir_mode,
-            pattern: '*_versions.yml'
-        ]
-    }
+    //withName: FASTQC {
+    //    ext.args = '--quiet'
+    //}
 
-    withName: 'MULTIQC' {
-        ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
-        publishDir = [
-            path: { "${params.outdir}/multiqc" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
+    //withName: CUSTOM_DUMPSOFTWAREVERSIONS {
+    //    publishDir = [
+    //        path: { "${params.outdir}/pipeline_info" },
+    //        mode: params.publish_dir_mode,
+    //        pattern: '*_versions.yml'
+    //    ]
+    //}
 
+    //withName: 'MULTIQC' {
+    //    ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
+    //    publishDir = [
+    //        path: { "${params.outdir}/multiqc" },
+    //        mode: params.publish_dir_mode,
+    //        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+    //    ]
+    //}
 }
diff --git a/conf/test_local.config b/conf/test_local.config
@@ -0,0 +1,68 @@
+params {
+
+    // pipeline parameters
+    trait                      = 'fhshdl'
+
+    // PASCAL
+    pascal_gwas_file           = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gwasA.csv.gz'
+    pascal_gene_annotation     = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gene_annotation.tsv'
+    // nextflow does not support s3 glob operations (tarballed)
+    pascal_ref_panel           = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/EUR_simulated.tar.gz'
+    pascal_header              = 0
+    pascal_pval_col            = 1
+
+    // MMAP
+    mmap_gene_list             = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/gene_list_gwasA_twas.txt'
+    mmap_pheno_file            = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap_pheno_adjusted_fhshdl_HGNC_gwasA_genes.csv'
+    mmap_pedigree_file         = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap.ped.v5.csv'
+    mmap_cov_matrix_file       = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/llfs.kinship.bin'
+    mmap_header                = 1
+    mmap_pval_col              = 'p_vals'
+    mmap_beta_col              = 'betas_genes'
+    mmap_se_genes              = 'se_genes'
+
+
+    // CMA module test files from S3
+    cma_two_traits             = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/two_traits/fhshdl/'
+    cma_three_complete_corr    = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_complete_correlation/'
+    cma_three_missing_obs      = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_missing_observations/'
+
+    // MEA preprocess
+    pipeline                   = 'cma'
+    module_file_dir            = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mea/preprocess/cherryPickModules/'
+    gene_col_name              = 'markname'
+    pval_col_name              = 'meta_p'
+
+    // MEA postprocess
+    numtests                   = 17551
+    alpha                      = 0.05
+
+    // Boilerplate options
+    publish_dir_mode           = 'copy'
+    email                      = '[email protected]'
+    //email_on_fail              = null
+    outdir                     = 'results'
+    //monochrome_logs            = null
+    //hook_url                   = null
+}
+
+includeConfig 'modules.config'
+
+process {
+    executor = 'local'
+    publishDir = [
+        mode: "copy",
+        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
+    ]
+}
+
+singularity {
+  enabled = true
+  autoMounts = true
+  cacheDir = "singularity_images"
+}
+
+conda.enabled = true
+
+NXF_SINGULARITY_CACHEDIR="/scratch/mblab/jungw/singularity_cache/"
+NXF_CONDA_CACHEDIR="/scratch/mblab/jungw/conda_cache/"
diff --git a/modules/local/cma/format_cma_input/main.nf b/modules/local/cma/format_cma_input/main.nf
@@ -2,6 +2,7 @@ process FORMAT_CMA_INPUT {
 
     label 'process_medium'
 
+    // include an image docker/singularity from biocontainers
     conda "${moduleDir}/environment.yml"
 
     input:

diff --git a/modules/local/pascal/main.nf b/modules/local/pascal/main.nf
@@ -3,7 +3,7 @@ process PASCAL {
 
     container 'docker://jungwooseok/pascal:1.0.3'
 
-    publishDir "results/pascal", mode:'copy', saveAs: { filename  -> filename.endsWith(".csv") ? "PASCAL.csv" : filename}
+    // publishDir "results/pascal", mode:'copy', saveAs: { filename  -> filename.endsWith(".csv") ? "PASCAL.csv" : filename}
 
     input:
     path gwas_file

diff --git a/subworkflows/local/mmap.nf b/subworkflows/local/mmap.nf
@@ -0,0 +1,62 @@
+//
+// workflow for running MMAP, parsing the output, and formatting it for CMA
+//
+include { MMAP } from '../../modules/local/mmap/mmap'
+include { MMAP_PARSE } from '../../modules/local/mmap/mmap_parse'
+include { FORMAT_CMA_INPUT } from '../../modules/local/cma/format_cma_input'
+
+workflow MMAP_SUBWORKFLOW {
+    take:
+    gene_list_file
+    trait
+    phenotype_file
+    pedigree_file
+    covariance_matrix_file
+
+    main:
+    ch_versions             = Channel.empty()
+    ch_concatenated_mmap    = Channel.empty()
+    ch_mmap_genes           = Channel.fromPath(gene_list_file)   
+      .splitText()
+      .map ( gene -> gene.trim() )
+    ch_mmap_cma_format      = Channel.empty()
+
+    //
+    // MODULE: MMAP
+    //
+    MMAP (
+      ch_mmap_genes, trait, phenotype_file, pedigree_file, covariance_matrix_file
+    )
+    ch_concatenated_mmap = MMAP.out.csv
+      .collectFile(name: 'mmap_results.csv', cache:false)
+    ch_versions = ch_versions.mix(MMAP.out.versions)
+
+    //
+    // MODULE: MMAP PARSE
+    //
+    MMAP_PARSE (
+      ch_concatenated_mmap
+    )
+    ch_mmap_parsed = MMAP_PARSE.out.mmap_parsed_output
+    ch_versions = ch_versions.mix(MMAP_PARSE.out.versions)
+
+    //
+    // MODULE: FORMAT_CMA_INPUT
+    //
+    FORMAT_CMA_INPUT (
+      ch_mmap_parsed,
+      "MMAP",
+      params.mmap_header,
+      params.mmap_pval_col,
+      params.mmap_beta_col,
+      params.mmap_se_genes
+    )
+    ch_mmap_cma_format = FORMAT_CMA_INPUT.out.csv
+    ch_versions = ch_versions.mix(FORMAT_CMA_INPUT.out.versions)
+
+
+    emit:
+    parsed_mmap_output      = ch_mmap_parsed
+    cma_format_output       = ch_mmap_cma_format
+    versions                = ch_versions
+}
diff --git a/subworkflows/local/pascal.nf b/subworkflows/local/pascal.nf
@@ -0,0 +1,38 @@
+//
+// workflow for running PASCAL and formatting the output for CMA
+//
+include { PASCAL } from '../../modules/local/pascal'
+include { FORMAT_CMA_INPUT } from '../../modules/local/cma/format_cma_input'
+
+workflow PASCAL_SUBWORKFLOW {
+    take:
+    gwas_file
+    gene_annotation
+    ref_panel
+
+    main:
+    ch_versions             = Channel.empty()
+    ch_pascal_out           = Channel.empty()
+
+    PASCAL (
+      gwas_file, gene_annotation, ref_panel
+    )
+    ch_pascal_out = PASCAL.out.tsv
+    ch_versions = ch_versions.mix(PASCAL.out.versions)
+
+    FORMAT_CMA_INPUT (
+      ch_pascal_out,
+      "PASCAL",
+      params.pascal_header,
+      params.pascal_pval_col,
+      [],
+      []
+    )
+    ch_pascal_cma_format = FORMAT_CMA_INPUT.out.csv
+    ch_versions = ch_versions.mix(FORMAT_CMA_INPUT.out.versions)
+
+    emit:
+    pascal_output         = ch_pascal_out
+    cma_format_output     = ch_pascal_cma_format
+    versions              = ch_versions
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ process FORMAT_CMA_INPUT { @@
         label 'process_medium'
+        // include an image docker/singularity from biocontainers
         conda "${moduleDir}/environment.yml"
         input:
@@ Expand Down @@