Skip to content

Commit

Permalink
Added subworkflows and config (#2)
Browse files Browse the repository at this point in the history
* updated nextflow.config with pipeline rename to omicsgenetraitassociation

* Template update for nf-core/tools version 2.11.1

* added modules CMA, MMAP, and PASCAL

* fixed versions emit values for cma/main.nf and pascal/main.nf

* added when directives to cma/main.nf

* added MEA modules

* pipeline works from CMA to MEA

* added config for CMA and MEA configs in modules.config

* updated .gitignore to exclude downloaded singularity images and added new config file for testing cma + mea modules

* added temporary subworkflow CMA_SUBWORKFLOW which will later be modified to run CMA and MEA

* commented out workflow initialization and validation steps for testing

* pascal/main.nf edited to extract tarball as nextflow does not support s3 glob support

* fixed versions emit name

* added test_local.config, modules.config, and updated subworkflows
  • Loading branch information
wsjung authored Jan 23, 2024
1 parent 7692913 commit b25be9b
Show file tree
Hide file tree
Showing 6 changed files with 204 additions and 20 deletions.
53 changes: 34 additions & 19 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,22 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]

withName: MMAP {
publishDir = [
path: { "${params.outdir}/MMAP/mmap" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.endsWith('.poly.cov.csv') ? filename : null }
]
}

withName: MMAP_PARSE {
publishDir = [
path: { "${params.outdir}/MMAP" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.startsWith('parsed_output_') ? filename : null }
]
}

withName: PREPROCESS_PASCAL {
publishDir = [
path: { "${params.outdir}/MEA" },
Expand Down Expand Up @@ -86,25 +102,24 @@ process {
]
}

withName: FASTQC {
ext.args = '--quiet'
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
pattern: '*_versions.yml'
]
}
//withName: FASTQC {
// ext.args = '--quiet'
//}

withName: 'MULTIQC' {
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
path: { "${params.outdir}/multiqc" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
//withName: CUSTOM_DUMPSOFTWAREVERSIONS {
// publishDir = [
// path: { "${params.outdir}/pipeline_info" },
// mode: params.publish_dir_mode,
// pattern: '*_versions.yml'
// ]
//}

//withName: 'MULTIQC' {
// ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
// publishDir = [
// path: { "${params.outdir}/multiqc" },
// mode: params.publish_dir_mode,
// saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
// ]
//}
}
68 changes: 68 additions & 0 deletions conf/test_local.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
params {

// pipeline parameters
trait = 'fhshdl'

// PASCAL
pascal_gwas_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gwasA.csv.gz'
pascal_gene_annotation = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/gene_annotation.tsv'
// nextflow does not support s3 glob operations (tarballed)
pascal_ref_panel = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/pascal/EUR_simulated.tar.gz'
pascal_header = 0
pascal_pval_col = 1

// MMAP
mmap_gene_list = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/gene_list_gwasA_twas.txt'
mmap_pheno_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap_pheno_adjusted_fhshdl_HGNC_gwasA_genes.csv'
mmap_pedigree_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/mmap.ped.v5.csv'
mmap_cov_matrix_file = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mmap/llfs.kinship.bin'
mmap_header = 1
mmap_pval_col = 'p_vals'
mmap_beta_col = 'betas_genes'
mmap_se_genes = 'se_genes'


// CMA module test files from S3
cma_two_traits = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/two_traits/fhshdl/'
cma_three_complete_corr = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_complete_correlation/'
cma_three_missing_obs = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/cma/three-traits/test_category_missing_observations/'

// MEA preprocess
pipeline = 'cma'
module_file_dir = 's3://brentlab-nextflow-testdata/omicsgenetraitassociation/mea/preprocess/cherryPickModules/'
gene_col_name = 'markname'
pval_col_name = 'meta_p'

// MEA postprocess
numtests = 17551
alpha = 0.05

// Boilerplate options
publish_dir_mode = 'copy'
email = '[email protected]'
//email_on_fail = null
outdir = 'results'
//monochrome_logs = null
//hook_url = null
}

includeConfig 'modules.config'

process {
executor = 'local'
publishDir = [
mode: "copy",
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
]
}

singularity {
enabled = true
autoMounts = true
cacheDir = "singularity_images"
}

conda.enabled = true

NXF_SINGULARITY_CACHEDIR="/scratch/mblab/jungw/singularity_cache/"
NXF_CONDA_CACHEDIR="/scratch/mblab/jungw/conda_cache/"
1 change: 1 addition & 0 deletions modules/local/cma/format_cma_input/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ process FORMAT_CMA_INPUT {

label 'process_medium'

// include an image docker/singularity from biocontainers
conda "${moduleDir}/environment.yml"

input:
Expand Down
2 changes: 1 addition & 1 deletion modules/local/pascal/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process PASCAL {

container 'docker://jungwooseok/pascal:1.0.3'

publishDir "results/pascal", mode:'copy', saveAs: { filename -> filename.endsWith(".csv") ? "PASCAL.csv" : filename}
// publishDir "results/pascal", mode:'copy', saveAs: { filename -> filename.endsWith(".csv") ? "PASCAL.csv" : filename}

input:
path gwas_file
Expand Down
62 changes: 62 additions & 0 deletions subworkflows/local/mmap.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//
// workflow for running MMAP, parsing the output, and formatting it for CMA
//
include { MMAP } from '../../modules/local/mmap/mmap'
include { MMAP_PARSE } from '../../modules/local/mmap/mmap_parse'
include { FORMAT_CMA_INPUT } from '../../modules/local/cma/format_cma_input'

workflow MMAP_SUBWORKFLOW {
take:
gene_list_file
trait
phenotype_file
pedigree_file
covariance_matrix_file

main:
ch_versions = Channel.empty()
ch_concatenated_mmap = Channel.empty()
ch_mmap_genes = Channel.fromPath(gene_list_file)
.splitText()
.map ( gene -> gene.trim() )
ch_mmap_cma_format = Channel.empty()

//
// MODULE: MMAP
//
MMAP (
ch_mmap_genes, trait, phenotype_file, pedigree_file, covariance_matrix_file
)
ch_concatenated_mmap = MMAP.out.csv
.collectFile(name: 'mmap_results.csv', cache:false)
ch_versions = ch_versions.mix(MMAP.out.versions)

//
// MODULE: MMAP PARSE
//
MMAP_PARSE (
ch_concatenated_mmap
)
ch_mmap_parsed = MMAP_PARSE.out.mmap_parsed_output
ch_versions = ch_versions.mix(MMAP_PARSE.out.versions)

//
// MODULE: FORMAT_CMA_INPUT
//
FORMAT_CMA_INPUT (
ch_mmap_parsed,
"MMAP",
params.mmap_header,
params.mmap_pval_col,
params.mmap_beta_col,
params.mmap_se_genes
)
ch_mmap_cma_format = FORMAT_CMA_INPUT.out.csv
ch_versions = ch_versions.mix(FORMAT_CMA_INPUT.out.versions)


emit:
parsed_mmap_output = ch_mmap_parsed
cma_format_output = ch_mmap_cma_format
versions = ch_versions
}
38 changes: 38 additions & 0 deletions subworkflows/local/pascal.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
//
// workflow for running PASCAL and formatting the output for CMA
//
include { PASCAL } from '../../modules/local/pascal'
include { FORMAT_CMA_INPUT } from '../../modules/local/cma/format_cma_input'

workflow PASCAL_SUBWORKFLOW {
take:
gwas_file
gene_annotation
ref_panel

main:
ch_versions = Channel.empty()
ch_pascal_out = Channel.empty()

PASCAL (
gwas_file, gene_annotation, ref_panel
)
ch_pascal_out = PASCAL.out.tsv
ch_versions = ch_versions.mix(PASCAL.out.versions)

FORMAT_CMA_INPUT (
ch_pascal_out,
"PASCAL",
params.pascal_header,
params.pascal_pval_col,
[],
[]
)
ch_pascal_cma_format = FORMAT_CMA_INPUT.out.csv
ch_versions = ch_versions.mix(FORMAT_CMA_INPUT.out.versions)

emit:
pascal_output = ch_pascal_out
cma_format_output = ch_pascal_cma_format
versions = ch_versions
}

0 comments on commit b25be9b

Please sign in to comment.