diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e226b4f..c379c507 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/treeval') }}" - runs-on: ubuntu2204-8c + runs-on: [ubuntu-latest] # Let's see if Pretext errors are a architecture thing strategy: matrix: NXF_VER: diff --git a/CHANGELOG.md b/CHANGELOG.md index 0170b9f2..7be3b8c6 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,11 +40,20 @@ This builds on the initial release by adding subworkflows which generate kmer ba - Fix a bug in build_alignment_blocks.py to avoid indexing errors happening in large genomes. - Change output BEDGRAPH from EXTRACT_TELO module. +#### Hot Fix 1 + +- Adding support for multi-library cram input. + +#### Hot Fix 2 + +- Adding support to select subworkflows to use in pipeline run. + ### Parameters | Old Parameter | New Parameter | | ------------- | ------------- | | - | --juicer | +| - | --steps | ### Software dependencies diff --git a/CITATIONS.md b/CITATIONS.md index d179e501..7db8de50 100755 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -34,6 +34,10 @@ > Durand, N.C. et al. 2016. ‘Juicer provides a one-click system for analyzing loop-resolution hi-C experiments’, Cell Systems, 3(1), pp. 95–98. doi:10.1016/j.cels.2016.07.002. +- [Merqury_FK](https://github.com/thegenemyers/MERQURY.FK) + + > Myers, G., Rhie, A. (2024). MerquryFK & KatFK. [online]. https://github.com/thegenemyers/MERQURY.FK. (Accessed on 20 September 2024). + - [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/) > Li, H. 2021. ‘New strategies to improve MINIMAP2 alignment accuracy’, Bioinformatics, 37(23), pp. 4572–4574. doi:10.1093/bioinformatics/btab705. @@ -72,7 +76,7 @@ - [Samtools](https://pubmed.ncbi.nlm.nih.gov/33590861/) - > Di Tommaso, Paolo, et al. 2017. “Nextflow Enables Reproducible Computational Workflows.” Nature Biotechnology, 35(4), pp. 316–19, https://doi.org/10.1038/nbt.3820. + > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. - [SeqTK](https://github.com/lh3/seqtk) diff --git a/main.nf b/main.nf index 2a9fc377..6931d2bd 100755 --- a/main.nf +++ b/main.nf @@ -25,6 +25,7 @@ WorkflowMain.initialise( workflow, params, log ) include { TREEVAL } from './workflows/treeval' include { TREEVAL_RAPID } from './workflows/treeval_rapid' include { TREEVAL_RAPID_TOL } from './workflows/treeval_rapid_tol' +include { TREEVAL_JBROWSE } from './workflows/treeval_jbrowse' // // WORKFLOW: RUN MAIN PIPELINE GENERATING ALL OUTPUT @@ -47,6 +48,15 @@ workflow SANGERTOL_TREEVAL_RAPID_TOL { TREEVAL_RAPID_TOL () } +// +// WORKFLOW: RUN ONLY THE SUBWORKFLOWS REQUIRED FOR JBROWSE UPLOAD +// - THIS IS TO COMPLEMENT A NEW PROCESS WHERE MAJORITY OF TICKETS WILL BE RC +// AND GET REQUESTED FOR FULL +// +workflow SANGERTOL_TREEVAL_JBROWSE { + TREEVAL_JBROWSE () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -68,6 +78,10 @@ workflow RAPID_TOL { SANGERTOL_TREEVAL_RAPID_TOL () } +workflow JBROWSE { + SANGERTOL_TREEVAL_JBROWSE () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules/local/avgcov.nf b/modules/local/avgcov.nf index 7e1e477e..0077f2bd 100755 --- a/modules/local/avgcov.nf +++ b/modules/local/avgcov.nf @@ -4,8 +4,8 @@ process AVGCOV { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) diff --git a/modules/local/bamtobed_sort.nf b/modules/local/bamtobed_sort.nf index c9d73306..bd1f0cfc 100755 --- a/modules/local/bamtobed_sort.nf +++ b/modules/local/bamtobed_sort.nf @@ -6,6 +6,11 @@ process BAMTOBED_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' : 'biocontainers/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "BAMTOBED_SORT module does not support Conda. Please use Docker / Singularity instead." + } + input: tuple val(meta), path(bam) diff --git a/modules/local/chunkfasta.nf b/modules/local/chunkfasta.nf index afb5050e..0400df24 100755 --- a/modules/local/chunkfasta.nf +++ b/modules/local/chunkfasta.nf @@ -4,8 +4,8 @@ process CHUNKFASTA { conda "conda-forge::pyfasta=0.5.2-1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyfasta:0.5.2--py_1' : - 'biocontainers/pyfasta:0.5.2--py_1' }" + 'https://depot.galaxyproject.org/singularity/pyfasta:0.5.2--py_1' : + 'biocontainers/pyfasta:0.5.2--py_1' }" input: tuple val(meta), path('input.fasta') diff --git a/modules/local/concatblocks.nf b/modules/local/concatblocks.nf index 5c01459d..f58641de 100755 --- a/modules/local/concatblocks.nf +++ b/modules/local/concatblocks.nf @@ -4,8 +4,8 @@ process CONCATBLOCKS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(mergeblocks) diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf index ca706e28..06624c52 100755 --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -6,6 +6,11 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT module does not support Conda. Please use Docker / Singularity instead." + } + input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix), path(reference) diff --git a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf index 8d8d69e4..a9d2b977 100755 --- a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf +++ b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf @@ -6,6 +6,11 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT module does not support Conda. Please use Docker / Singularity instead." + } + input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference) diff --git a/modules/local/extract_buscogene.nf b/modules/local/extract_buscogene.nf index 44149d74..a5cd3a5e 100755 --- a/modules/local/extract_buscogene.nf +++ b/modules/local/extract_buscogene.nf @@ -4,8 +4,8 @@ process EXTRACT_BUSCOGENE { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: diff --git a/modules/local/extract_cov_iden.nf b/modules/local/extract_cov_iden.nf index d50fd39c..bddcbab7 100755 --- a/modules/local/extract_cov_iden.nf +++ b/modules/local/extract_cov_iden.nf @@ -4,15 +4,15 @@ process EXTRACT_COV_IDEN { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*.bed" ) , emit: punchlist - path "versions.yml" , emit: versions + tuple val(meta), file("*.bed") , emit: punchlist + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" diff --git a/modules/local/extract_repeat.nf b/modules/local/extract_repeat.nf index 85fe9c93..39f7ee23 100755 --- a/modules/local/extract_repeat.nf +++ b/modules/local/extract_repeat.nf @@ -8,11 +8,11 @@ process EXTRACT_REPEAT { 'biocontainers/perl:5.26.2' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), path( "*.bed" ) , emit: bed - path "versions.yml" , emit: versions + tuple val(meta), path("*.bed") , emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index c39e665c..cfd25908 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -4,16 +4,16 @@ process EXTRACT_TELO { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*bed" ) , emit: bed - tuple val( meta ), file("*bedgraph"), emit: bedgraph - path "versions.yml" , emit: versions + tuple val(meta), file("*bed") , emit: bed + tuple val(meta), file("*bedgraph"), emit: bedgraph + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/find_telomere_regions.nf b/modules/local/find_telomere_regions.nf index c9f0e6a3..f2b78f8a 100755 --- a/modules/local/find_telomere_regions.nf +++ b/modules/local/find_telomere_regions.nf @@ -4,13 +4,18 @@ process FIND_TELOMERE_REGIONS { container 'docker.io/library/gcc:10.4.0' + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FIND_TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." + } + input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) val (telomereseq) output: - tuple val( meta ), file( "*.telomere" ) , emit: telomere - path "versions.yml" , emit: versions + tuple val(meta), file("*.telomere") , emit: telomere + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/find_telomere_windows.nf b/modules/local/find_telomere_windows.nf index 2fcd0022..675fd544 100755 --- a/modules/local/find_telomere_windows.nf +++ b/modules/local/find_telomere_windows.nf @@ -3,17 +3,16 @@ process FIND_TELOMERE_WINDOWS { label 'process_low' conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && - !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : 'biocontainers/java-jdk:8.0.112--1' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*.windows" ) , emit: windows - path "versions.yml" , emit: versions + tuple val(meta), file("*.windows") , emit: windows + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/fkutils/fkprof/main.nf b/modules/local/fkutils/fkprof/main.nf index 8562e2c8..399c2d5a 100644 --- a/modules/local/fkutils/fkprof/main.nf +++ b/modules/local/fkutils/fkprof/main.nf @@ -6,13 +6,18 @@ process FKUTILS_FKPROF { 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'docker.io/ubuntu:20.04' }" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FKUTILS_FKPROF module does not support Conda. Please use Docker / Singularity instead." + } + input: - tuple val( meta ), path( reference ) - tuple val( meta2 ), path( ktab ) + tuple val(meta), path(reference) + tuple val(meta2), path(ktab) output: - tuple val( meta ), file( "*bed" ), emit: bed - path "versions.yml", emit: versions + tuple val(meta), file("*bed"), emit: bed + path "versions.yml", emit: versions script: def args = task.ext.args ?: "" diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf index b5bf0733..dd1a5878 100755 --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -4,15 +4,15 @@ process GAP_LENGTH { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*bedgraph" ) , emit: bedgraph - path "versions.yml" , emit: versions + tuple val(meta), file("*bedgraph") , emit: bedgraph + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index 6a06bb87..4ddf0162 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -2,7 +2,14 @@ process GENERATE_CRAM_CSV { tag "${meta.id}" label 'process_tiny' - container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GENERATE_CRAM_CSV module does not support Conda. Please use Docker / Singularity instead." + } input: tuple val(meta), path(crampath) diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf index 2296958c..091abc3e 100755 --- a/modules/local/get_largest_scaff.nf +++ b/modules/local/get_largest_scaff.nf @@ -5,11 +5,11 @@ process GET_LARGEST_SCAFF { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: env largest_scaff , emit: scaff_size diff --git a/modules/local/get_paired_contact_bed.nf b/modules/local/get_paired_contact_bed.nf index e6d3a135..b3db6d1f 100755 --- a/modules/local/get_paired_contact_bed.nf +++ b/modules/local/get_paired_contact_bed.nf @@ -4,15 +4,15 @@ process GET_PAIRED_CONTACT_BED { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*bed" ) , emit: bed - path "versions.yml" , emit: versions + tuple val(meta), file("*bed") , emit: bed + path "versions.yml" , emit: versions script: def pulled = '-T sort_tmp' diff --git a/modules/local/getminmaxpunches.nf b/modules/local/getminmaxpunches.nf index 6e828bb5..0a095b29 100755 --- a/modules/local/getminmaxpunches.nf +++ b/modules/local/getminmaxpunches.nf @@ -4,16 +4,16 @@ process GETMINMAXPUNCHES{ conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) output: - tuple val(meta), path ( '*zero.bed' ) , optional: true , emit: min - tuple val(meta), path ( '*max.bed' ) , optional: true , emit: max - path "versions.yml" , emit: versions + tuple val(meta), path ('*zero.bed') , optional: true , emit: min + tuple val(meta), path ('*max.bed') , optional: true , emit: max + path "versions.yml" , emit: versions shell: def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/graphoverallcoverage.nf b/modules/local/graphoverallcoverage.nf index b8cc8777..572e793d 100755 --- a/modules/local/graphoverallcoverage.nf +++ b/modules/local/graphoverallcoverage.nf @@ -2,7 +2,14 @@ process GRAPHOVERALLCOVERAGE { tag "$meta.id" label "process_single" - container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" + + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "GRAPHOVERALLCOVERAGE module does not support Conda. Please use Docker / Singularity instead." + } input: tuple val(meta), path(bed) diff --git a/modules/local/juicer_tools_pre.nf b/modules/local/juicer_tools_pre.nf index 12b46ce8..d12ec28c 100755 --- a/modules/local/juicer_tools_pre.nf +++ b/modules/local/juicer_tools_pre.nf @@ -5,8 +5,7 @@ process JUICER_TOOLS_PRE { label 'process_medium' conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && - !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : 'biocontainers/java-jdk:8.0.112--1' }" diff --git a/modules/local/paf_to_bed.nf b/modules/local/paf_to_bed.nf index c50f0373..445d3b2f 100755 --- a/modules/local/paf_to_bed.nf +++ b/modules/local/paf_to_bed.nf @@ -4,15 +4,15 @@ process PAF2BED { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*_punchlist.bed" ), emit: punchlist - path "versions.yml" , emit: versions + tuple val(meta), file("*_punchlist.bed"), emit: punchlist + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" diff --git a/modules/local/pretext_graph.nf b/modules/local/pretext_graph.nf index 3f600441..9a1d3ff2 100644 --- a/modules/local/pretext_graph.nf +++ b/modules/local/pretext_graph.nf @@ -4,6 +4,11 @@ process PRETEXT_GRAPH { container "quay.io/sanger-tol/pretext:0.0.2-yy5-c3" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "PRETEXT_GRAPH module does not support Conda. Please use Docker / Singularity instead." + } + input: tuple val(meta), path(pretext_file) tuple val(gap), path(gap_file) @@ -13,8 +18,8 @@ process PRETEXT_GRAPH { tuple val(rep), path(repeat_density) output: - tuple val(meta), path("*.pretext") , emit: pretext - path "versions.yml" , emit: versions + tuple val(meta), path("*.pretext") , emit: pretext + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/reformat_intersect.nf b/modules/local/reformat_intersect.nf index bcc0be77..3c4cdb61 100755 --- a/modules/local/reformat_intersect.nf +++ b/modules/local/reformat_intersect.nf @@ -4,14 +4,15 @@ process REFORMAT_INTERSECT { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*.bed" ), emit: bed + tuple val(meta), file("*.bed"), emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/rename_ids.nf b/modules/local/rename_ids.nf index f69f518d..b6b12920 100755 --- a/modules/local/rename_ids.nf +++ b/modules/local/rename_ids.nf @@ -4,15 +4,15 @@ process RENAME_IDS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*bed" ) , emit: bed - path "versions.yml" , emit: versions + tuple val(meta), file("*bed") , emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/replace_dots.nf b/modules/local/replace_dots.nf index 4d12f5cd..bb0f051e 100755 --- a/modules/local/replace_dots.nf +++ b/modules/local/replace_dots.nf @@ -4,15 +4,15 @@ process REPLACE_DOTS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val( meta ), path( file ) + tuple val(meta), path(file) output: - tuple val( meta ), file( "*bed" ), emit: bed - path "versions.yml" , emit: versions + tuple val(meta), file("*bed"), emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/nextflow.config b/nextflow.config index af9a14b9..b9439082 100755 --- a/nextflow.config +++ b/nextflow.config @@ -14,6 +14,7 @@ params { input = null outdir = "./results" juicer = false + steps = "NONE" tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 00e2ce44..f8921bc3 100755 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -35,6 +35,11 @@ "default": false, "fa_icon": "fas fa-check" }, + "steps": { + "type": "string", + "description": "A csv list of steps to skip", + "fa_icon": "fas fa-folder-open" + }, "email": { "type": "string", "description": "Email address for completion summary.", diff --git a/subworkflows/local/ancestral_gene.nf b/subworkflows/local/ancestral_gene.nf index cfee2061..6ac72ab9 100755 --- a/subworkflows/local/ancestral_gene.nf +++ b/subworkflows/local/ancestral_gene.nf @@ -33,10 +33,10 @@ workflow ANCESTRAL_GENE { // LOGIC: STRIP OUT METADATA // ch_grab - .map { meta, fulltable + .map {meta, fulltable -> fulltable } - .set { assignanc_input } + .set {assignanc_input} // // MODULE: ASSIGN EXTRACTED GENES TO ANCESTRAL GROUPS @@ -61,7 +61,7 @@ workflow ANCESTRAL_GENE { // UCSC_BEDTOBIGBED( BEDTOOLS_SORT.out.sorted, - dot_genome.map{ it[1] }, // Pull file from tuple(meta, file) + dot_genome.map{it[1]}, // Pull file from tuple(meta, file) buscogene_as ) ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) diff --git a/subworkflows/local/busco_annotation.nf b/subworkflows/local/busco_annotation.nf index 974738f6..40c85166 100755 --- a/subworkflows/local/busco_annotation.nf +++ b/subworkflows/local/busco_annotation.nf @@ -31,7 +31,7 @@ workflow BUSCO_ANNOTATION { ch_versions = Channel.empty() // COMMENT: Set BUSCO mode to 'genome' - ch_busco_mode = Channel.of( "genome" ) + ch_busco_mode = Channel.of("genome") // @@ -45,9 +45,9 @@ workflow BUSCO_ANNOTATION { lineagespath, [] ) - ch_versions = ch_versions.mix( BUSCO.out.versions.first() ) + ch_versions = ch_versions.mix(BUSCO.out.versions.first()) - ch_grab = GrabFiles( BUSCO.out.busco_dir ) + ch_grab = GrabFiles(BUSCO.out.busco_dir) // // MODULE: EXTRACT THE BUSCO GENES FOUND IN REFERENCE @@ -55,7 +55,7 @@ workflow BUSCO_ANNOTATION { EXTRACT_BUSCOGENE ( ch_grab ) - ch_versions = ch_versions.mix( EXTRACT_BUSCOGENE.out.versions ) + ch_versions = ch_versions.mix(EXTRACT_BUSCOGENE.out.versions) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE @@ -68,7 +68,7 @@ workflow BUSCO_ANNOTATION { file ) } - .set { bedtools_input } + .set {bedtools_input} // // MODULE: SORT THE EXTRACTED BUSCO GENE // @@ -76,7 +76,7 @@ workflow BUSCO_ANNOTATION { bedtools_input, [] ) - ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) + ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) // // MODULE: CONVERT THE BED TO BIGBED @@ -86,30 +86,30 @@ workflow BUSCO_ANNOTATION { dot_genome.map{it[1]}, // Gets file from tuple (meta, file) buscogene_as ) - ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) + ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) // // LOGIC: AGGREGATE DATA AND SORT BRANCH ON CLASS // lineageinfo - .combine( BUSCO.out.busco_dir ) - .combine( ancestral_table ) + .combine(BUSCO.out.busco_dir) + .combine(ancestral_table) .branch { lep: it[0].split('_')[0] == "lepidoptera" general: it[0].split('_')[0] != "lepidoptera" } - .set{ ch_busco_data } + .set{ch_busco_data} // // LOGIC: BUILD NEW INPUT CHANNEL FOR ANCESTRAL ID // ch_busco_data .lep - .multiMap { lineage, meta, busco_dir, ancestral_table -> + .multiMap {lineage, meta, busco_dir, ancestral_table -> busco_dir: tuple( meta, busco_dir ) atable: ancestral_table } - .set{ ch_busco_lep_data } + .set{ch_busco_lep_data} // // SUBWORKFLOW: RUN ANCESTRAL BUSCO ID (ONLY AVAILABLE FOR LEPIDOPTERA) @@ -120,7 +120,7 @@ workflow BUSCO_ANNOTATION { buscogene_as, ch_busco_lep_data.atable ) - ch_versions = ch_versions.mix( ANCESTRAL_GENE.out.versions ) + ch_versions = ch_versions.mix(ANCESTRAL_GENE.out.versions) emit: ch_buscogene_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/gap_finder.nf b/subworkflows/local/gap_finder.nf index 5b53d908..89feaf49 100755 --- a/subworkflows/local/gap_finder.nf +++ b/subworkflows/local/gap_finder.nf @@ -20,7 +20,7 @@ workflow GAP_FINDER { SEQTK_CUTN ( reference_tuple ) - ch_versions = ch_versions.mix( SEQTK_CUTN.out.versions ) + ch_versions = ch_versions.mix(SEQTK_CUTN.out.versions) // // MODULE: ADD THE LENGTH OF GAP TO BED FILE - INPUT FOR PRETEXT MODULE @@ -28,7 +28,7 @@ workflow GAP_FINDER { GAP_LENGTH ( SEQTK_CUTN.out.bed ) - ch_versions = ch_versions.mix( GAP_LENGTH.out.versions ) + ch_versions = ch_versions.mix(GAP_LENGTH.out.versions) // // MODULE: BGZIP AND TABIX THE GAP FILE @@ -36,7 +36,7 @@ workflow GAP_FINDER { TABIX_BGZIPTABIX ( SEQTK_CUTN.out.bed ) - ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) emit: gap_file = GAP_LENGTH.out.bedgraph diff --git a/subworkflows/local/gene_alignment.nf b/subworkflows/local/gene_alignment.nf index 3269c201..243c2f43 100755 --- a/subworkflows/local/gene_alignment.nf +++ b/subworkflows/local/gene_alignment.nf @@ -31,7 +31,7 @@ workflow GENE_ALIGNMENT { .map{ meta, file -> "${meta.class}" } - .set { assembly_class } + .set {assembly_class} // @@ -50,14 +50,14 @@ workflow GENE_ALIGNMENT { // SUBWORKFLOW // ch_data - .combine( alignment_datadir ) - .combine( assembly_class ) + .combine(alignment_datadir) + .combine(assembly_class) .map { ch_org, data_dir, classT -> file("${data_dir}${classT}/csv_data/${ch_org}-data.csv") } - .splitCsv( header: true, sep:',') - .map( row -> + .splitCsv(header: true, sep:',') + .map(row -> tuple([ org: row.org, type: row.type, id: row.data_file.split('/')[-1].split('.MOD.')[0] @@ -94,7 +94,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix( GEN_ALIGNMENTS.out.versions ) + ch_versions = ch_versions.mix(GEN_ALIGNMENTS.out.versions) CDS_ALIGNMENTS ( reference_tuple, reference_index, @@ -102,7 +102,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix( CDS_ALIGNMENTS.out.versions ) + ch_versions = ch_versions.mix(CDS_ALIGNMENTS.out.versions) RNA_ALIGNMENTS ( reference_tuple, reference_index, @@ -110,7 +110,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix( RNA_ALIGNMENTS.out.versions ) + ch_versions = ch_versions.mix(RNA_ALIGNMENTS.out.versions) emit: pep_gff = PEP_ALIGNMENTS.out.tbi_gff diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf index 5c4d06a5..166475e3 100755 --- a/subworkflows/local/generate_genome.nf +++ b/subworkflows/local/generate_genome.nf @@ -3,7 +3,6 @@ // // MODULE IMPORT BLOCK // -include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff' include { GENERATE_UNSORTED_GENOME } from '../../subworkflows/local/generate_unsorted_genome' include { GENERATE_SORTED_GENOME } from '../../subworkflows/local/generate_sorted_genome' @@ -24,7 +23,7 @@ workflow GENERATE_GENOME { reference_file .combine(map_order) - .map{ ref_meta, ref, map_order -> + .map{ref_meta, ref, map_order -> tuple( [ id: ref_meta.id, map_order :map_order @@ -44,7 +43,7 @@ workflow GENERATE_GENOME { GENERATE_SORTED_GENOME ( ch_genomesize_input.sorted ) - ch_versions = ch_versions.mix( GENERATE_SORTED_GENOME.out.versions ) + ch_versions = ch_versions.mix(GENERATE_SORTED_GENOME.out.versions) ch_genomesize = GENERATE_SORTED_GENOME.out.genomesize ch_genome_fai = GENERATE_SORTED_GENOME.out.ref_index ch_versions = GENERATE_SORTED_GENOME.out.versions @@ -55,22 +54,12 @@ workflow GENERATE_GENOME { GENERATE_UNSORTED_GENOME ( ch_genomesize_input.unsorted ) - ch_versions = ch_versions.mix( GENERATE_UNSORTED_GENOME.out.versions ) - ch_genomesize = ch_genomesize.mix( GENERATE_UNSORTED_GENOME.out.genomesize ) - ch_genome_fai = ch_genome_fai.mix( GENERATE_UNSORTED_GENOME.out.ref_index ) + ch_versions = ch_versions.mix(GENERATE_UNSORTED_GENOME.out.versions) + ch_genomesize = ch_genomesize.mix(GENERATE_UNSORTED_GENOME.out.genomesize) + ch_genome_fai = ch_genome_fai.mix(GENERATE_UNSORTED_GENOME.out.ref_index) ch_versions = GENERATE_UNSORTED_GENOME.out.versions - // - // MODULE: Cut out the largest scaffold size and use as comparator against 512MB - // This is the cut off for TABIX using tbi indexes - // - GET_LARGEST_SCAFF ( - ch_genomesize - ) - ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions ) - emit: - max_scaff_size = GET_LARGEST_SCAFF.out.scaff_size.toInteger() dot_genome = ch_genomesize ref_index = ch_genome_fai ref = reference_file diff --git a/subworkflows/local/generate_sorted_genome.nf b/subworkflows/local/generate_sorted_genome.nf index bc38e2dd..71ec98b7 100755 --- a/subworkflows/local/generate_sorted_genome.nf +++ b/subworkflows/local/generate_sorted_genome.nf @@ -18,13 +18,13 @@ workflow GENERATE_SORTED_GENOME { reference_file, "unsorted.genome" ) - ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) genome_size = CUSTOM_GETCHROMSIZES.out.sizes GNU_SORT ( CUSTOM_GETCHROMSIZES.out.sizes ) - ch_versions = ch_versions.mix( GNU_SORT.out.versions ) + ch_versions = ch_versions.mix(GNU_SORT.out.versions) emit: genomesize = GNU_SORT.out.sorted diff --git a/subworkflows/local/generate_unsorted_genome.nf b/subworkflows/local/generate_unsorted_genome.nf index 93bf8e66..de5e6f0c 100755 --- a/subworkflows/local/generate_unsorted_genome.nf +++ b/subworkflows/local/generate_unsorted_genome.nf @@ -17,7 +17,7 @@ workflow GENERATE_UNSORTED_GENOME { reference_file, "unsorted.genome" ) - ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) emit: diff --git a/subworkflows/local/hic_bamtobed.nf b/subworkflows/local/hic_bamtobed.nf index 432ae1b7..70ab1ba8 100755 --- a/subworkflows/local/hic_bamtobed.nf +++ b/subworkflows/local/hic_bamtobed.nf @@ -1,6 +1,6 @@ #!/usr/bin/env nextflow -// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow. +// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow. // It runs markdup, sort and get paired contacts. // Input - Assembled genomic fasta file, .bam file // Output - sorted .bed and paired contact .bed @@ -25,12 +25,12 @@ workflow HIC_BAMTOBED { // LOGIC: PREPARE MARKDUP INPUT // bam_file - .combine( reference_tuple ) - .multiMap { meta_bam, bam_file, meta_ref, ref -> - bam : tuple(meta_bam, bam_file ) - reference : ref + .combine(reference_tuple) + .multiMap {meta_bam, bam_file, meta_ref, ref -> + bam : tuple(meta_bam, bam_file) + reference : ref } - .set { markdup_input } + .set {markdup_input} // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -39,7 +39,7 @@ workflow HIC_BAMTOBED { markdup_input.bam, markdup_input.reference ) - ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) + ch_versions = ch_versions.mix (SAMTOOLS_MARKDUP.out.versions) // // MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE @@ -47,15 +47,15 @@ workflow HIC_BAMTOBED { BAMTOBED_SORT( SAMTOOLS_MARKDUP.out.bam ) - ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions ) + ch_versions = ch_versions.mix(BAMTOBED_SORT.out.versions) // // MODULE: GENERATE CONTACT PAIRS // - GET_PAIRED_CONTACT_BED( - BAMTOBED_SORT.out.sorted_bed + GET_PAIRED_CONTACT_BED( + BAMTOBED_SORT.out.sorted_bed ) - ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions ) + ch_versions = ch_versions.mix(GET_PAIRED_CONTACT_BED.out.versions) emit: paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed diff --git a/subworkflows/local/hic_bwamem2.nf b/subworkflows/local/hic_bwamem2.nf index 9409cf7a..67d861cb 100755 --- a/subworkflows/local/hic_bwamem2.nf +++ b/subworkflows/local/hic_bwamem2.nf @@ -24,14 +24,14 @@ workflow HIC_BWAMEM2 { BWAMEM2_INDEX ( reference_tuple - ) - ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) + ) + ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) csv_ch .splitCsv() - .combine ( reference_tuple ) - .combine ( BWAMEM2_INDEX.out.index ) - .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> + .combine (reference_tuple) + .combine (BWAMEM2_INDEX.out.index) + .map{cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> tuple([ id: cram_id.id ], @@ -46,7 +46,7 @@ workflow HIC_BWAMEM2 { ref_dir ) } - .set { ch_filtering_input } + .set {ch_filtering_input} // // MODULE: map hic reads by 10,000 container per time using bwamem2 @@ -55,18 +55,18 @@ workflow HIC_BWAMEM2 { ch_filtering_input ) - ch_versions = ch_versions.mix( CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions ) + ch_versions = ch_versions.mix(CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions) mappedbam_ch = CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.mappedbam // // LOGIC: PREPARING BAMS FOR MERGE // mappedbam_ch - .map{ meta, file -> + .map{meta, file -> tuple( file ) } .collect() - .map { file -> + .map {file -> tuple ( [ id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] @@ -74,7 +74,7 @@ workflow HIC_BWAMEM2 { file ) } - .set { collected_files_for_merge } + .set {collected_files_for_merge} // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -84,7 +84,7 @@ workflow HIC_BWAMEM2 { reference_tuple, reference_index ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + ch_versions = ch_versions.mix (SAMTOOLS_MERGE.out.versions.first()) emit: diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index e379f49a..aed1f9bb 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -41,21 +41,21 @@ workflow HIC_MAPPING { ch_versions = Channel.empty() // COMMENT: 1000bp BIN SIZE INTERVALS FOR CLOAD - ch_cool_bin = Channel.of( 1000 ) + ch_cool_bin = Channel.of(1000) // // LOGIC: make channel of hic reads as input for GENERATE_CRAM_CSV // reference_tuple - .combine( hic_reads_path ) - .map { meta, ref, hic_meta, hic_reads_path -> + .combine(hic_reads_path) + .map {meta, ref, hic_meta, hic_reads_path -> tuple( [ id: meta.id, single_end: true], hic_reads_path ) } - .set { get_reads_input } + .set {get_reads_input} // // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT @@ -63,21 +63,21 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV ( get_reads_input ) - ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) + ch_versions = ch_versions.mix(GENERATE_CRAM_CSV.out.versions) // // LOGIC: make branches for different hic aligner. // hic_reads_path .combine(reference_tuple) - .map{ meta, hic_read_path, ref_meta, ref-> + .map{meta, hic_read_path, ref_meta, ref-> tuple( [ id : ref_meta, aligner : meta.aligner ], ref ) - } + } .branch{ minimap2 : it[0].aligner == "minimap2" bwamem2 : it[0].aligner == "bwamem2" @@ -92,7 +92,7 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV.out.csv, reference_index ) - ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) + ch_versions = ch_versions.mix(HIC_MINIMAP2.out.versions) mergedbam = HIC_MINIMAP2.out.mergedbam // @@ -103,18 +103,18 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV.out.csv, reference_index ) - ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) + ch_versions = ch_versions.mix(HIC_BWAMEM2.out.versions) mergedbam = mergedbam.mix(HIC_BWAMEM2.out.mergedbam) // // LOGIC: PREPARING PRETEXT MAP INPUT // mergedbam - .combine( reference_tuple ) - .combine ( dot_genome ) + .combine(reference_tuple) + .combine (dot_genome) .multiMap { bam_meta, bam, ref_meta, ref_fa, genome_meta, genome_file -> input_bam: tuple( [ id: bam_meta.id, - sz: file( bam ).size() ], + sz: file(bam).size() ], bam ) // NOTE: Inject the genome file into the channel to speed up PretextMap @@ -123,7 +123,7 @@ workflow HIC_MAPPING { genome_file ) } - .set { pretext_input } + .set {pretext_input} // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR LOW RES @@ -132,7 +132,7 @@ workflow HIC_MAPPING { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) + ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) // // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT @@ -145,7 +145,7 @@ workflow HIC_MAPPING { telo_file, repeat_density_file ) - ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) + ch_versions = ch_versions.mix(PRETEXT_INGEST_SNDRD.out.versions) // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES @@ -154,7 +154,7 @@ workflow HIC_MAPPING { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) + ch_versions = ch_versions.mix(PRETEXTMAP_HIGHRES.out.versions) // // NOTICE: This could fail on LARGE hires maps due to some memory parameter in the C code @@ -169,7 +169,7 @@ workflow HIC_MAPPING { telo_file, repeat_density_file ) - ch_versions = ch_versions.mix( PRETEXT_INGEST_HIRES.out.versions ) + ch_versions = ch_versions.mix(PRETEXT_INGEST_HIRES.out.versions) // // MODULE: GENERATE PNG FROM STANDARD PRETEXT @@ -177,7 +177,7 @@ workflow HIC_MAPPING { SNAPSHOT_SRES ( PRETEXTMAP_STANDRD.out.pretext ) - ch_versions = ch_versions.mix ( SNAPSHOT_SRES.out.versions ) + ch_versions = ch_versions.mix (SNAPSHOT_SRES.out.versions) // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G @@ -195,10 +195,10 @@ workflow HIC_MAPPING { tosubsample : it[0].sz >= 50000000000 unmodified : it[0].sz < 50000000000 } - .set { ch_merged_bam } + .set {ch_merged_bam} // LOGIC: PREPARE BAMTOBED JUICER INPUT. - if ( workflow_setting != "RAPID_TOL" && params.juicer == false ) { + if (workflow_setting != "RAPID_TOL" && params.juicer == false) { // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G // @@ -215,7 +215,7 @@ workflow HIC_MAPPING { tosubsample : it[0].sz >= 50000000000 unmodified : it[0].sz < 50000000000 } - .set { ch_merged_bam } + .set {ch_merged_bam} // // MODULE: SUBSAMPLE BAM @@ -223,7 +223,7 @@ workflow HIC_MAPPING { SUBSAMPLE_BAM ( ch_merged_bam.tosubsample ) - ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) + ch_versions = ch_versions.mix (SUBSAMPLE_BAM.out.versions) // // LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT @@ -235,12 +235,12 @@ workflow HIC_MAPPING { // LOGIC: PREPARE BAMTOBED JUICER INPUT // ch_subsampled_bam - .combine( reference_tuple ) + .combine(reference_tuple) .multiMap { meta, subsampled_bam, meta_ref, ref -> bam : tuple(meta, subsampled_bam ) reference : tuple(meta_ref, ref) } - .set { ch_bamtobed_juicer_input } + .set {ch_bamtobed_juicer_input} // // SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM @@ -249,19 +249,19 @@ workflow HIC_MAPPING { ch_bamtobed_juicer_input.bam, ch_bamtobed_juicer_input.reference ) - ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions ) + ch_versions = ch_versions.mix(HIC_BAMTOBED_JUICER.out.versions) // // LOGIC: PREPARE JUICER TOOLS INPUT // HIC_BAMTOBED_JUICER.out.paired_contacts_bed - .combine( dot_genome ) + .combine(dot_genome) .multiMap { meta, paired_contacts, meta_my_genome, my_genome -> - paired : tuple([ id: meta.id, single_end: true], paired_contacts ) + paired : tuple([id: meta.id, single_end: true], paired_contacts) genome : my_genome id : meta.id } - .set { ch_juicer_input } + .set {ch_juicer_input} // // MODULE: GENERATE HIC MAP, ONLY IS PIPELINE IS RUNNING ON ENTRY FULL @@ -271,19 +271,19 @@ workflow HIC_MAPPING { ch_juicer_input.genome, ch_juicer_input.id ) - ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) + ch_versions = ch_versions.mix(JUICER_TOOLS_PRE.out.versions) } // // LOGIC: PREPARE BAMTOBED COOLER INPUT // mergedbam - .combine( reference_tuple ) + .combine(reference_tuple) .multiMap { meta, merged_bam, meta_ref, ref -> bam : tuple(meta, merged_bam ) reference : tuple(meta_ref, ref) } - .set { ch_bamtobed_cooler_input } + .set {ch_bamtobed_cooler_input} // // SUBWORKFLOW: BAM TO BED FOR COOLER @@ -292,26 +292,26 @@ workflow HIC_MAPPING { ch_bamtobed_cooler_input.bam, ch_bamtobed_cooler_input.reference ) - ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions ) + ch_versions = ch_versions.mix(HIC_BAMTOBED_COOLER.out.versions) // // LOGIC: BIN CONTACT PAIRS // HIC_BAMTOBED_COOLER.out.paired_contacts_bed - .join( HIC_BAMTOBED_COOLER.out.sorted_bed ) - .combine( ch_cool_bin ) - .set { ch_binned_pairs } + .join(HIC_BAMTOBED_COOLER.out.sorted_bed) + .combine( ch_cool_bin) + .set {ch_binned_pairs} // // LOGIC: PREPARE COOLER INPUT // ch_binned_pairs .combine(dot_genome) - .multiMap { meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> - cooler_in : tuple ( meta, pairs, bed, cool_bin ) + .multiMap {meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> + cooler_in : tuple (meta, pairs, bed, cool_bin) genome_file : my_genome } - .set { ch_cooler } + .set {ch_cooler} // // MODULE: GENERATE A MULTI-RESOLUTION COOLER FILE BY COARSENING @@ -326,7 +326,7 @@ workflow HIC_MAPPING { // LOGIC: REFACTOR CHANNEL FOR ZOOMIFY // COOLER_CLOAD.out.cool - .map{ meta, cools, cool_bin -> + .map{meta, cools, cool_bin -> [meta, cools] } .set{ch_cool} @@ -345,14 +345,14 @@ workflow HIC_MAPPING { ch_cram_files .collect() - .map { meta, cram -> + .map {meta, cram -> tuple( [ id: 'cram', sz: cram instanceof ArrayList ? cram.collect { it.size()} : cram.size(), ], cram ) } - .combine( GENERATE_CRAM_CSV.out.csv ) + .combine(GENERATE_CRAM_CSV.out.csv) .map { meta, data, meta2, csv -> tuple( [ id: meta.id, sz: meta.sz, @@ -361,7 +361,7 @@ workflow HIC_MAPPING { data ) } - .set { ch_reporting_cram } + .set {ch_reporting_cram} emit: mcool = COOLER_ZOOMIFY.out.mcool diff --git a/subworkflows/local/hic_minimap2.nf b/subworkflows/local/hic_minimap2.nf index b37ff30d..76b7cf74 100755 --- a/subworkflows/local/hic_minimap2.nf +++ b/subworkflows/local/hic_minimap2.nf @@ -19,27 +19,27 @@ workflow HIC_MINIMAP2 { reference_tuple // Channel: tuple [ val(meta), path( file ) ] csv_ch reference_index - + main: ch_versions = Channel.empty() mappedbam_ch = Channel.empty() // - // MODULE: generate minimap2 mmi file - // + // MODULE: generate minimap2 mmi file + // MINIMAP2_INDEX ( reference_tuple - ) - ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) + ) + ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) // // LOGIC: generate input channel for mapping - // + // csv_ch .splitCsv() - .combine ( reference_tuple ) - .combine ( MINIMAP2_INDEX.out.index ) - .map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path-> + .combine (reference_tuple) + .combine (MINIMAP2_INDEX.out.index) + .map{cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path -> tuple([ id: cram_id.id ], @@ -54,16 +54,16 @@ workflow HIC_MINIMAP2 { ref_dir ) } - .set { ch_filtering_input } + .set {ch_filtering_input} // // MODULE: map hic reads by 10,000 container per time - // + // CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT ( ch_filtering_input ) - ch_versions = ch_versions.mix( CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions ) + ch_versions = ch_versions.mix(CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions) mappedbam_ch = CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.mappedbam @@ -71,19 +71,19 @@ workflow HIC_MINIMAP2 { // LOGIC: PREPARING BAMS FOR MERGE // mappedbam_ch - .map{ meta, file -> - tuple( file ) + .map{meta, file -> + tuple(file) } .collect() - .map { file -> + .map {file -> tuple ( [ - id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] ], file ) } - .set { collected_files_for_merge } + .set {collected_files_for_merge} // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -93,10 +93,10 @@ workflow HIC_MINIMAP2 { reference_tuple, reference_index ) - ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + ch_versions = ch_versions.mix (SAMTOOLS_MERGE.out.versions.first()) + - emit: - mergedbam = SAMTOOLS_MERGE.out.bam + mergedbam = SAMTOOLS_MERGE.out.bam versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index fe6c0f46..314d8970 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -27,24 +27,24 @@ workflow INSILICO_DIGEST { // MULTIMAP INTO TWO CHANNELS SO THERE IS REFERENCE * ENZYME CHANNELS // reference - .map { meta, data -> + .map {meta, data -> tuple( [ id : meta.id, single_end : false ], file( data ) ) } - .set { input_fasta } + .set {input_fasta} input_fasta .combine(ch_enzyme) - .multiMap { meta, reference, enzyme_id -> + .multiMap {meta, reference, enzyme_id -> fasta : tuple( meta, reference ) enzyme : enzyme_id } - .set { fa2c_input } + .set {fa2c_input} // // MODULE: CONVERTS FASTA INTO A COLOUR-AWARE BIONANO CMAP FORMAT @@ -62,7 +62,7 @@ workflow INSILICO_DIGEST { MAKECMAP_FA2CMAPMULTICOLOR.out.cmap .map{ meta, cfile -> tuple( - [ id : cfile.toString().split('_')[-3] ], + [id : cfile.toString().split('_')[-3]], cfile ) } @@ -71,21 +71,21 @@ workflow INSILICO_DIGEST { MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey .map{ kfile -> tuple( - [ id : kfile.toString().split('_')[-4] ], + [id : kfile.toString().split('_')[-4]], kfile ) } - .set { ch_cmapkey_new } + .set {ch_cmapkey_new} ch_cmap_new .join(ch_cmapkey_new) - .multiMap { meta, cfile, kfile -> + .multiMap {meta, cfile, kfile -> cmap : tuple( meta, cfile) key_file : kfile } - .set { ch_join } + .set {ch_join} // // MODULE: RENAME CMAP IDs FROM BIONANO IDX TO ORIGINAL GENOMIC LOCATIONS @@ -98,11 +98,11 @@ workflow INSILICO_DIGEST { ch_versions = ch_versions.mix(MAKECMAP_RENAMECMAPIDS.out.versions) MAKECMAP_RENAMECMAPIDS.out.renamedcmap - .multiMap { meta, file -> + .multiMap {meta, file -> full : tuple ( meta, file ) sample : meta.id } - .set { ch_renamedcmap } + .set {ch_renamedcmap} // // MODULE: CONVERT CMAP FILE INTO BED FILE @@ -117,12 +117,12 @@ workflow INSILICO_DIGEST { MAKECMAP_CMAP2BED.out.bedfile .combine(sizefile) .combine(dot_as) - .multiMap { meta, bed, meta_2, dot_genome, as_file -> - bed_tuple : tuple( meta, bed ) + .multiMap {meta, bed, meta_2, dot_genome, as_file -> + bed_tuple : tuple(meta, bed) genome_file : dot_genome autosql : as_file } - .set { combined_ch } + .set {combined_ch} // // MODULE: CONVERT ABOVE BED INTO BIGBED WITH ADDITIONAL AS FILE diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf index 2c98f955..9ef7278c 100755 --- a/subworkflows/local/kmer.nf +++ b/subworkflows/local/kmer.nf @@ -26,47 +26,47 @@ workflow KMER { // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT // reads_path - .map { meta, reads_path -> + .map {meta, reads_path -> tuple( [ id : meta.id, single_end : true ], reads_path ) } - .set { get_reads_input } + .set {get_reads_input} // // MODULE: GETS PACBIO READ PATHS FROM READS_PATH // - ch_grabbed_read_paths = GrabFiles( get_reads_input ) + ch_grabbed_read_paths = GrabFiles(get_reads_input) // // MODULE: JOIN PACBIO READ // CAT_CAT( ch_grabbed_read_paths ) - ch_versions = ch_versions.mix( CAT_CAT.out.versions.first() ) + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) // // MODULE: COUNT KMERS // FASTK_FASTK( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix( FASTK_FASTK.out.versions.first() ) + ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first()) // // LOGIC: PREPARE MERQURYFK INPUT // FASTK_FASTK.out.hist - .combine( FASTK_FASTK.out.ktab ) - .combine( reference_tuple ) + .combine(FASTK_FASTK.out.ktab) + .combine(reference_tuple) .map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary -> tuple( meta_hist, hist, ktab, primary, [] ) } - .set{ ch_merq } + .set{ch_merq} // // MODULE: USE KMER HISTOGRAM TO PRODUCE SPECTRA GRAPH // - MERQURYFK_MERQURYFK ( ch_merq ) + MERQURYFK_MERQURYFK (ch_merq) ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions.first() ) emit: @@ -82,10 +82,10 @@ process GrabFiles { executor 'local' input: - tuple val( meta ), path( "in" ) + tuple val(meta), path("in") output: - tuple val( meta ), path( "in/*.fasta.gz" ) + tuple val(meta), path("in/*.fasta.gz") "true" } diff --git a/subworkflows/local/nuc_alignments.nf b/subworkflows/local/nuc_alignments.nf index cff5235f..e918b643 100755 --- a/subworkflows/local/nuc_alignments.nf +++ b/subworkflows/local/nuc_alignments.nf @@ -33,16 +33,17 @@ workflow NUC_ALIGNMENTS { // nuc_files .flatten() - .buffer( size: 2 ) - .combine ( reference_tuple ) - .combine( intron_size ) - .map { meta, nuc_file, ref_meta, ref, intron -> - tuple( [id: meta.id, - type: meta.type, - org: meta.org, - intron_size: intron, - split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], - single_end: true + .buffer(size: 2) + .combine(reference_tuple) + .combine(intron_size) + .map {meta, nuc_file, ref_meta, ref, intron -> + tuple( [ + id: meta.id, + type: meta.type, + org: meta.org, + intron_size: intron, + split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], + single_end: true ], nuc_file, ref, @@ -52,15 +53,15 @@ workflow NUC_ALIGNMENTS { false ) } - .multiMap { meta, nuc_file, reference, bool_1, bool_2, bool_3, bool_4 -> - nuc : tuple( meta, nuc_file) + .multiMap {meta, nuc_file, reference, bool_1, bool_2, bool_3, bool_4 -> + nuc : tuple(meta, nuc_file) ref : reference bool_bam_output : bool_1 bool_cigar_paf : bool_2 bool_cigar_bam : bool_3 bool_bedfile : bool_4 } - .set { formatted_input } + .set {formatted_input} // // MODULE: ALIGNS REFERENCE FAIDX TO THE GENE_ALIGNMENT QUERY FILE FROM NUC_FILES @@ -81,13 +82,13 @@ workflow NUC_ALIGNMENTS { // AND DATA TYPE (RNA, CDS, DNA). // MINIMAP2_ALIGN.out.bam - .map { meta, file -> + .map {meta, file -> tuple( [ id: meta.org, type: meta.type ], - file) } - .groupTuple( by: [0] ) // group by meta list - .set { merge_input } + file)} + .groupTuple(by: [0]) // group by meta list + .set {merge_input} // // MODULE: MERGES THE BAM FILES FOUND IN THE GROUPED TUPLE IN REGARDS TO THE REFERENCE @@ -111,7 +112,7 @@ workflow NUC_ALIGNMENTS { // // MODULE: CONVERTS THE ABOVE MERGED BAM INTO BED FORMAT // - BEDTOOLS_BAMTOBED ( SAMTOOLS_MERGE.out.bam ) + BEDTOOLS_BAMTOBED (SAMTOOLS_MERGE.out.bam) ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) // TODO: try filtering out here too @@ -120,7 +121,7 @@ workflow NUC_ALIGNMENTS { // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // BEDTOOLS_BAMTOBED.out.bed - .map { meta, file -> + .map {meta, file -> tuple ( [ id: meta.id, type: meta.type, lines: file.countLines() @@ -128,7 +129,7 @@ workflow NUC_ALIGNMENTS { file ) } - .set { bedtools_input } + .set {bedtools_input} // // MODULE: SORTS THE ABOVE BED FILE @@ -150,16 +151,17 @@ workflow NUC_ALIGNMENTS { file_size: file.size() ], file ) } - .filter { it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink - .combine( dot_genome ) - .multiMap { meta, ref, genome_meta, genome -> + .filter {it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink + .combine(dot_genome) + .multiMap {meta, ref, genome_meta, genome -> bed_file: tuple( [ id: meta.id, type: meta.type, ], - ref ) + ref + ) dot_genome: genome } - .set { ucsc_input } + .set {ucsc_input} // // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGBED FILE @@ -169,7 +171,7 @@ workflow NUC_ALIGNMENTS { ucsc_input.dot_genome, [] ) - ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) + ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) emit: nuc_alignment = UCSC_BEDTOBIGBED.out.bigbed.collect() diff --git a/subworkflows/local/pep_alignments.nf b/subworkflows/local/pep_alignments.nf index 32d6da30..a3b3cae3 100755 --- a/subworkflows/local/pep_alignments.nf +++ b/subworkflows/local/pep_alignments.nf @@ -34,19 +34,23 @@ workflow PEP_ALIGNMENTS { // pep_files .flatten() - .buffer( size: 2 ) - .combine ( MINIPROT_INDEX.out.index ) - .multiMap { pep_meta, pep_file, miniprot_meta, miniprot_index -> - pep_tuple : tuple( [ id: pep_meta.id, - type: pep_meta.type, - org: pep_meta.org - ], - pep_file ) - index_file : tuple( [ id: "Reference", - ], - miniprot_index ) + .buffer(size: 2) + .combine (MINIPROT_INDEX.out.index) + .multiMap {pep_meta, pep_file, miniprot_meta, miniprot_index -> + pep_tuple : tuple( [ + id: pep_meta.id, + type: pep_meta.type, + org: pep_meta.org + ], + pep_file + ) + index_file : tuple( [ + id: "Reference" + ], + miniprot_index + ) } - .set { formatted_input } + .set {formatted_input} // // MODULE: ALIGNS PEP DATA WITH REFERENCE INDEX @@ -56,21 +60,21 @@ workflow PEP_ALIGNMENTS { formatted_input.pep_tuple, formatted_input.index_file ) - ch_versions = ch_versions.mix( MINIPROT_ALIGN.out.versions ) + ch_versions = ch_versions.mix(MINIPROT_ALIGN.out.versions) // // LOGIC: GROUPS OUTPUT GFFS BASED ON QUERY ORGANISMS AND DATA TYPE (PEP) // MINIPROT_ALIGN.out.gff - .map { meta, file -> + .map {meta, file -> tuple( [ id : meta.org + '_pep', type : meta.type ], file ) } - .groupTuple( by: [0] ) - .set { grouped_tuple } + .groupTuple(by: [0]) + .set {grouped_tuple} // // MODULE: AS ABOVE OUTPUT IS BED FORMAT, IT IS MERGED PER ORGANISM + TYPE @@ -78,20 +82,20 @@ workflow PEP_ALIGNMENTS { CAT_CAT ( grouped_tuple ) - ch_versions = ch_versions.mix( CAT_CAT.out.versions ) + ch_versions = ch_versions.mix(CAT_CAT.out.versions) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // CAT_CAT.out.file_out - .map { meta, file -> + .map {meta, file -> tuple ( [ id: meta.id, lines: file.countLines() ], file ) } - .set { bedtools_input } + .set {bedtools_input} // // MODULE: SORTS ABOVE OUTPUT AND RETAINS GFF SUFFIX @@ -101,7 +105,7 @@ workflow PEP_ALIGNMENTS { bedtools_input , [] ) - ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) + ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) // // MODULE: CUTS GFF INTO PUNCHLIST @@ -109,7 +113,7 @@ workflow PEP_ALIGNMENTS { EXTRACT_COV_IDEN ( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix( EXTRACT_COV_IDEN.out.versions ) + ch_versions = ch_versions.mix(EXTRACT_COV_IDEN.out.versions) // // MODULE: COMPRESS AND INDEX MERGED.GFF @@ -118,7 +122,7 @@ workflow PEP_ALIGNMENTS { TABIX_BGZIPTABIX ( BEDTOOLS_SORT.out.sorted ) - ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) emit: gff_file = BEDTOOLS_SORT.out.sorted diff --git a/subworkflows/local/repeat_density.nf b/subworkflows/local/repeat_density.nf index 8764408f..2e5f70eb 100755 --- a/subworkflows/local/repeat_density.nf +++ b/subworkflows/local/repeat_density.nf @@ -31,7 +31,7 @@ workflow REPEAT_DENSITY { WINDOWMASKER_MKCOUNTS ( reference_tuple ) - ch_versions = ch_versions.mix( WINDOWMASKER_MKCOUNTS.out.versions ) + ch_versions = ch_versions.mix(WINDOWMASKER_MKCOUNTS.out.versions) // // MODULE: CALCULATE THE STATISTICS OF THE MARKED UP REGIONS @@ -40,7 +40,7 @@ workflow REPEAT_DENSITY { WINDOWMASKER_MKCOUNTS.out.counts, reference_tuple ) - ch_versions = ch_versions.mix( WINDOWMASKER_USTAT.out.versions ) + ch_versions = ch_versions.mix(WINDOWMASKER_USTAT.out.versions) // // MODULE: USE USTAT OUTPUT TO EXTRACT REPEATS FROM FASTA @@ -48,7 +48,7 @@ workflow REPEAT_DENSITY { EXTRACT_REPEAT( WINDOWMASKER_USTAT.out.intervals ) - ch_versions = ch_versions.mix( EXTRACT_REPEAT.out.versions ) + ch_versions = ch_versions.mix(EXTRACT_REPEAT.out.versions) // // MODULE: CREATE WINDOWS FROM .GENOME FILE @@ -56,7 +56,7 @@ workflow REPEAT_DENSITY { BEDTOOLS_MAKEWINDOWS( dot_genome ) - ch_versions = ch_versions.mix( BEDTOOLS_MAKEWINDOWS.out.versions ) + ch_versions = ch_versions.mix(BEDTOOLS_MAKEWINDOWS.out.versions) // // LOGIC: COMBINE TWO CHANNELS AND OUTPUT tuple(meta, windows_file, repeat_file) @@ -70,7 +70,7 @@ workflow REPEAT_DENSITY { repeat_file ) } - .set { intervals } + .set {intervals} // // MODULE: GENERATES THE REPEAT FILE FROM THE WINDOW FILE AND GENOME FILE @@ -79,7 +79,7 @@ workflow REPEAT_DENSITY { intervals, dot_genome ) - ch_versions = ch_versions.mix( BEDTOOLS_INTERSECT.out.versions ) + ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) // // MODULE: FIXES IDS FOR REPEATS @@ -87,7 +87,7 @@ workflow REPEAT_DENSITY { RENAME_IDS( BEDTOOLS_INTERSECT.out.intersect ) - ch_versions = ch_versions.mix( RENAME_IDS.out.versions ) + ch_versions = ch_versions.mix(RENAME_IDS.out.versions) // // MODULE: SORTS THE ABOVE BED FILES @@ -95,17 +95,17 @@ workflow REPEAT_DENSITY { GNU_SORT_A ( RENAME_IDS.out.bed // Intersect file ) - ch_versions = ch_versions.mix( GNU_SORT_A.out.versions ) + ch_versions = ch_versions.mix(GNU_SORT_A.out.versions) GNU_SORT_B ( dot_genome // Genome file - Will not run unless genome file is sorted to ) - ch_versions = ch_versions.mix( GNU_SORT_B.out.versions ) + ch_versions = ch_versions.mix(GNU_SORT_B.out.versions) GNU_SORT_C ( BEDTOOLS_MAKEWINDOWS.out.bed // Windows file ) - ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) + ch_versions = ch_versions.mix(GNU_SORT_C.out.versions) // // MODULE: ADDS 4TH COLUMN TO BED FILE USED IN THE REPEAT DENSITY GRAPH @@ -113,7 +113,7 @@ workflow REPEAT_DENSITY { REFORMAT_INTERSECT ( GNU_SORT_A.out.sorted ) - ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) + ch_versions = ch_versions.mix(REFORMAT_INTERSECT.out.versions) // // MODULE: TABIX AND GZIP THE REPEAT DENSITY BED FILE FOR JBROWSE @@ -121,7 +121,7 @@ workflow REPEAT_DENSITY { TABIX_BGZIPTABIX ( REFORMAT_INTERSECT.out.bed ) - ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) // // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO @@ -136,7 +136,7 @@ workflow REPEAT_DENSITY { bed ) } - .set { for_mapping } + .set {for_mapping} // // MODULE: MAPS THE REPEATS AGAINST THE REFERENCE GENOME @@ -145,7 +145,7 @@ workflow REPEAT_DENSITY { for_mapping, GNU_SORT_B.out.sorted ) - ch_versions = ch_versions.mix( BEDTOOLS_MAP.out.versions ) + ch_versions = ch_versions.mix(BEDTOOLS_MAP.out.versions) // // MODULE: REPLACES . WITH 0 IN MAPPED FILE @@ -153,16 +153,16 @@ workflow REPEAT_DENSITY { REPLACE_DOTS ( BEDTOOLS_MAP.out.mapped ) - ch_versions = ch_versions.mix( REPLACE_DOTS.out.versions ) + ch_versions = ch_versions.mix(REPLACE_DOTS.out.versions) // // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGWIG FILE // UCSC_BEDGRAPHTOBIGWIG( REPLACE_DOTS.out.bed, - GNU_SORT_B.out.sorted.map { it[1] } // Pulls file from tuple of meta and file + GNU_SORT_B.out.sorted.map{it[1]} // Pulls file from tuple of meta and file ) - ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) + ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions) emit: repeat_density = UCSC_BEDGRAPHTOBIGWIG.out.bigwig diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index 25022c77..9d1e0e95 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -36,7 +36,7 @@ workflow SELFCOMP { SELFCOMP_SPLITFASTA( reference_tuple ) - ch_versions = ch_versions.mix( SELFCOMP_SPLITFASTA.out.versions ) + ch_versions = ch_versions.mix(SELFCOMP_SPLITFASTA.out.versions) // // LOGIC: CALCULATE THE NUMBER OF GB WHICH WILL DICTATE THE NUMBER OF @@ -44,12 +44,12 @@ workflow SELFCOMP { // ALSO CALCULATES THE NUMBER OF TOTAL WINDOWS NEEDED IN THE REFERENCE // reference_tuple - .map{ it, file -> file.size()} - .set { file_size } // Using set as TAP will force the pipeline to not complete successfully in some cases + .map{it, file -> file.size()} + .set{file_size} // Using set as TAP will force the pipeline to not complete successfully in some cases file_size .sum{it / 1e9} - .collect { new java.math.BigDecimal (it).setScale(0, RoundingMode.UP) } + .collect {new java.math.BigDecimal (it).setScale(0, RoundingMode.UP)} .flatten() .set { chunk_number } @@ -61,7 +61,7 @@ workflow SELFCOMP { SELFCOMP_SPLITFASTA.out.fa, chunk_number ) - ch_versions = ch_versions.mix( CHUNKFASTA.out.versions ) + ch_versions = ch_versions.mix(CHUNKFASTA.out.versions) // // LOGIC: STRIP META FROM QUERY, AND COMBINE WITH REFERENCE FILE @@ -69,19 +69,19 @@ workflow SELFCOMP { // OR n=((REFERENCE / 1E9) * (REFENCE / 1E9)) IF GENOME.SIZE() > 1GB // CHUNKFASTA.out.fasta - .map{ meta, query -> + .map{meta, query -> query } .collect() // Collect any output from CHUNKFASTA - .map { it -> + .map {it -> tuple( [ len: it.size() ], // Calc length of list it ) } - .set { len_ch } // tap out to preserve length of CHUNKFASTA list + .set {len_ch} // tap out to preserve length of CHUNKFASTA list len_ch // tap swapped with set as tap stops pipeline completion - .map { meta, files -> + .map {meta, files -> files } .flatten() // flatten list into singles @@ -99,7 +99,7 @@ workflow SELFCOMP { } .transpose() // Transpose the channel so that we have a channel for file in query // allows this to work on list of 1 and beyond - .map { meta, ref, qry -> + .map{meta, ref, qry -> tuple( [ id: meta.id, sz: meta.sz, it: qry.toString().split('/')[-1] // get file name of the new query @@ -108,7 +108,7 @@ workflow SELFCOMP { qry ) } - .set{ mummer_input } + .set{mummer_input} // // MODULE: ALIGNS 1GB CHUNKS TO 500KB CHUNKS @@ -117,25 +117,25 @@ workflow SELFCOMP { MUMMER( mummer_input ) - ch_versions = ch_versions.mix( MUMMER.out.versions ) + ch_versions = ch_versions.mix(MUMMER.out.versions) // // LOGIC: COLLECT COORD FILES AND CONVERT TO LIST OF FILES // ADD REFERENCE META // MUMMER.out.coords - .map{ meta, file -> + .map{meta, file -> file } .collect() .toList() - .combine( reference_tuple ) - .map { files, meta, ref -> + .combine(reference_tuple) + .map{files, meta, ref -> tuple( meta, files ) } - .set { ch_mummer_files } + .set {ch_mummer_files} // // MODULE: MERGES MUMMER ALIGNMENT FILES @@ -143,7 +143,7 @@ workflow SELFCOMP { CAT_CAT( ch_mummer_files ) - ch_versions = ch_versions.mix( CAT_CAT.out.versions ) + ch_versions = ch_versions.mix(CAT_CAT.out.versions) // // MODULE: CONVERT THE MUMMER ALIGNMENTS INTO BED FORMAT @@ -152,7 +152,7 @@ workflow SELFCOMP { CAT_CAT.out.file_out, motif_len ) - ch_versions = ch_versions.mix( SELFCOMP_MUMMER2BED.out.versions ) + ch_versions = ch_versions.mix(SELFCOMP_MUMMER2BED.out.versions) // // MODULE: GENERATE A LIST OF IDs AND GENOMIC POSITIONS OF SELFCOMPLEMENTARY REGIONS @@ -162,20 +162,20 @@ workflow SELFCOMP { SELFCOMP_MUMMER2BED.out.bedfile, SELFCOMP_SPLITFASTA.out.agp ) - ch_versions = ch_versions.mix( SELFCOMP_MAPIDS.out.versions ) + ch_versions = ch_versions.mix(SELFCOMP_MAPIDS.out.versions) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // SELFCOMP_MAPIDS.out.bedfile - .map { meta, file -> + .map{meta, file -> tuple ( [ id: meta.id, lines: file.countLines() ], file ) } - .set { bedtools_input } + .set{bedtools_input} // // MODULE: SORTS ABOVE OUTPUT BED FILE AND RETAINS BED SUFFIX @@ -184,7 +184,7 @@ workflow SELFCOMP { bedtools_input, [] ) - ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) + ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) // // MODULE: BUILD ALIGNMENT BLOCKS @@ -192,7 +192,7 @@ workflow SELFCOMP { SELFCOMP_ALIGNMENTBLOCKS( BEDTOOLS_SORT.out.sorted ) - ch_versions = ch_versions.mix( SELFCOMP_ALIGNMENTBLOCKS.out.versions ) + ch_versions = ch_versions.mix(SELFCOMP_ALIGNMENTBLOCKS.out.versions) // // MODULE: SORT BLOCKS FILES AND FILTER BY MOTIF LENGTH @@ -200,7 +200,7 @@ workflow SELFCOMP { CONCATBLOCKS( SELFCOMP_ALIGNMENTBLOCKS.out.blockfile ) - ch_versions = ch_versions.mix( CONCATBLOCKS.out.versions ) + ch_versions = ch_versions.mix(CONCATBLOCKS.out.versions) // // MODULE: CONVERTS ABOVE OUTPUT INTO BIGBED FORMAT @@ -210,7 +210,7 @@ workflow SELFCOMP { dot_genome.map{it[1]}, // Pulls file from tuple ( meta and file ) selfcomp_as ) - ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) + ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions ) emit: ch_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf index 33c0875b..04b43cf4 100755 --- a/subworkflows/local/synteny.nf +++ b/subworkflows/local/synteny.nf @@ -18,21 +18,21 @@ workflow SYNTENY { // AND PARSE INTO CHANNEL PER GENOME // reference_tuple - .combine( synteny_path ) - .map { meta, reference, dir_path -> + .combine(synteny_path) + .map{meta, reference, dir_path -> file("${dir_path}${meta.class}/*.fasta") } .flatten() - .combine( reference_tuple ) - .multiMap { syntenic_ref, meta, ref -> - syntenic_tuple : tuple( meta, syntenic_ref ) + .combine(reference_tuple) + .multiMap{syntenic_ref, meta, ref -> + syntenic_tuple : tuple(meta, syntenic_ref) reference_fa : ref bool_bam_output : false bool_cigar_paf : true bool_cigar_bam : false bool_bedfile : false } - .set { mm_input } + .set {mm_input} // // MODULE: ALIGNS THE SUNTENIC GENOMES TO THE REFERENCE GENOME @@ -46,7 +46,7 @@ workflow SYNTENY { mm_input.bool_cigar_bam, mm_input.bool_bedfile, ) - ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions ) + ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) emit: ch_paf = MINIMAP2_ALIGN.out.paf diff --git a/subworkflows/local/telo_finder.nf b/subworkflows/local/telo_finder.nf index fe5b704b..aa0b7b80 100755 --- a/subworkflows/local/telo_finder.nf +++ b/subworkflows/local/telo_finder.nf @@ -24,7 +24,7 @@ workflow TELO_FINDER { reference_tuple, teloseq ) - ch_versions = ch_versions.mix( FIND_TELOMERE_REGIONS.out.versions ) + ch_versions = ch_versions.mix(FIND_TELOMERE_REGIONS.out.versions) // // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE @@ -32,7 +32,7 @@ workflow TELO_FINDER { FIND_TELOMERE_WINDOWS ( FIND_TELOMERE_REGIONS.out.telomere ) - ch_versions = ch_versions.mix( FIND_TELOMERE_WINDOWS.out.versions ) + ch_versions = ch_versions.mix(FIND_TELOMERE_WINDOWS.out.versions) // // MODULE: EXTRACTS THE LOCATION OF TELOMERIC SEQUENCE BASED ON THE WINDOWS @@ -40,7 +40,7 @@ workflow TELO_FINDER { EXTRACT_TELO ( FIND_TELOMERE_WINDOWS.out.windows ) - ch_versions = ch_versions.mix( EXTRACT_TELO.out.versions ) + ch_versions = ch_versions.mix(EXTRACT_TELO.out.versions) // // MODULE: BGZIP AND TABIX THE OUTPUT FILE @@ -48,7 +48,7 @@ workflow TELO_FINDER { TABIX_BGZIPTABIX ( EXTRACT_TELO.out.bed ) - ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) + ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) emit: bed_file = EXTRACT_TELO.out.bed diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index e3ad75da..a734a728 100755 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -24,18 +24,18 @@ workflow YAML_INPUT { .flatten() .combine( workflow_id ) .multiMap { data, id -> - assembly: ( data.assembly ) - assembly_reads: ( data.assem_reads ) - hic_data: ( data.hic_data ) - kmer_profile: ( data.kmer_profile ) - reference: ( file(data.reference_file, checkIfExists: true) ) - alignment: ( id == "FULL" ? data.alignment : "" ) - self_comp: ( id == "FULL" ? data.self_comp : "" ) - synteny: ( id == "FULL" ? data.synteny : "" ) - intron: ( id == "FULL" ? data.intron : "" ) - busco_gene: ( data.busco ) - teloseq: ( data.telomere ) - map_order: ( data.map_order) + assembly: (data.assembly) + assembly_reads: (data.assem_reads) + hic_data: (data.hic_data) + kmer_profile: (data.kmer_profile) + reference: (file(data.reference_file, checkIfExists: true)) + alignment: (id == "FULL" || id == "JBROWSE" ? data.alignment : "") + self_comp: (id == "FULL" || id == "JBROWSE" ? data.self_comp : "") + synteny: (id == "FULL" || id == "JBROWSE" ? data.synteny : "") + intron: (id == "FULL" || id == "JBROWSE" ? data.intron : "") + busco_gene: (data.busco) + teloseq: (data.telomere) + map_order: (data.map_order) } .set{ group } @@ -44,7 +44,7 @@ workflow YAML_INPUT { // group .assembly - .multiMap { data -> + .multiMap{ data -> assem_level: data.assem_level assem_version: data.assem_version sample_id: data.sample_id @@ -52,97 +52,98 @@ workflow YAML_INPUT { defined_class: data.defined_class project_id: data.project_id } - .set { assembly_data } + .set{assembly_data} group .assembly_reads - .multiMap { data -> + .multiMap{ data -> read_type: data.read_type read_data: data.read_data supplement: data.supplementary_data } - .set { assem_reads } + .set{assem_reads} group .hic_data - .multiMap { data -> + .multiMap{ data -> hic_cram: data.hic_cram hic_aligner: data.hic_aligner } - .set { hic } + .set {hic} group .kmer_profile - .multiMap { data -> + .multiMap{ data -> length: data.kmer_length dir: data.dir } - .set { kmer_profiling } + .set {kmer_profiling} group .alignment - .combine( workflow_id ) - .multiMap { data, id -> - data_dir: (id == "FULL" ? data.data_dir : "") - common_name: (id == "FULL" ? data.common_name : "") - geneset_id: (id == "FULL" ? data.geneset_id : "") + .combine(workflow_id) + .multiMap{ data, id -> + data_dir: (id == "FULL" || id == "JBROWSE" ? data.data_dir : "") + common_name: (id == "FULL" || id == "JBROWSE" ? data.common_name : "") + geneset_id: (id == "FULL" || id == "JBROWSE" ? data.geneset_id : "") } - .set{ alignment_data } + .set{alignment_data} group .self_comp - .combine( workflow_id ) - .multiMap { data, id -> - motif_len: (id == "FULL" ? data.motif_len : "") - mummer_chunk: (id == "FULL" ? data.mummer_chunk : "") + .combine(workflow_id) + .multiMap{ data, id -> + motif_len: (id == "FULL" || id == "JBROWSE" ? data.motif_len : "") + mummer_chunk: (id == "FULL" || id == "JBROWSE" ? data.mummer_chunk : "") } - .set{ selfcomp_data } + .set{selfcomp_data} group .synteny - .combine( workflow_id ) - .multiMap { data, id -> - synteny_genome: (id == "FULL" ? data.synteny_genome_path: "") + .combine(workflow_id) + .multiMap{ data, id -> + synteny_genome: (id == "FULL" || id == "JBROWSE" ? data.synteny_genome_path: "") } - .set{ synteny_data } + .set{synteny_data} group .intron - .combine( workflow_id ) - .multiMap { data, id -> - size: (id == "FULL" ? data.size : "") + .combine(workflow_id) + .multiMap{ data, id -> + size: (id == "FULL" || id == "JBROWSE" ? data.size : "") } - .set { intron_size } + .set {intron_size} group .teloseq - .multiMap { data -> + .multiMap{ data -> teloseq: data.teloseq } - .set { teloseq } + .set {teloseq} group .busco_gene - .multiMap { data -> + .multiMap{ data -> lineage: data.lineage lineages_path: data.lineages_path } - .set { busco_lineage } + .set {busco_lineage} // // LOGIC: COMBINE SOME CHANNELS INTO VALUES REQUIRED DOWNSTREAM // assembly_data.sample_id - .combine( assembly_data.assem_version ) - .map { it1, it2 -> - ("${it1}_${it2}")} - .set { tolid_version } + .combine(assembly_data.assem_version) + .map{it1, it2 -> + ("${it1}_${it2}") + } + .set{tolid_version} tolid_version - .combine( group.reference ) - .combine( assembly_data.defined_class ) - .combine( assembly_data.project_id ) - .map { sample, ref_file, defined_class, project -> + .combine(group.reference) + .combine(assembly_data.defined_class) + .combine(assembly_data.project_id) + .map{sample, ref_file, defined_class, project -> tuple( [ id: sample, class: defined_class, project_type: project @@ -150,13 +151,13 @@ workflow YAML_INPUT { ref_file ) } - .set { ref_ch } + .set{ref_ch} - if ( assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" } ) { + if (assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" }) { tolid_version - .combine( assem_reads.read_type ) - .combine( assem_reads.read_data ) - .map{ sample, type, data -> + .combine(assem_reads.read_type) + .combine(assem_reads.read_data) + .map{sample, type, data -> tuple( [ id : sample, single_end : true, read_type : type @@ -164,13 +165,13 @@ workflow YAML_INPUT { data ) } - .set { read_ch } + .set {read_ch} } - else if ( assem_reads.read_type.filter { it == "illumina" } ) { + else if (assem_reads.read_type.filter {it == "illumina"}) { tolid_version - .combine( assem_reads.read_type ) - .combine( assem_reads.read_data ) - .map{ sample, type, data -> + .combine(assem_reads.read_type) + .combine(assem_reads.read_data) + .map{sample, type, data -> tuple( [ id : sample, single_end : false, read_type : type @@ -178,40 +179,40 @@ workflow YAML_INPUT { data ) } - .set { read_ch } + .set {read_ch} } tolid_version - .combine( hic.hic_cram ) - .combine( hic.hic_aligner ) - .map { sample, data, aligner -> + .combine(hic.hic_cram) + .combine(hic.hic_aligner) + .map{sample, data, aligner -> tuple( [ id: sample, aligner: aligner ], data ) } - .set { hic_ch } + .set {hic_ch} tolid_version - .combine( assem_reads.supplement ) - .map { sample, data -> + .combine(assem_reads.supplement) + .map{sample, data -> tuple( [ id: sample ], data ) } - .set { supplement_ch } + .set {supplement_ch} tolid_version - .combine ( assembly_data.sample_id ) - .combine ( kmer_profiling.length ) - .combine ( kmer_profiling.dir ) - .map { sample, sample_id, kmer_len, dir -> + .combine (assembly_data.sample_id) + .combine (kmer_profiling.length) + .combine (kmer_profiling.dir) + .map{sample, sample_id, kmer_len, dir -> tuple( [ id: sample, kmer: kmer_len ], file("${dir}/k${kmer_len}/${sample_id}.k${kmer_len}.ktab") // Don't check for existence yet ) } - .set { kmer_prof } + .set {kmer_prof} emit: assembly_id = tolid_version @@ -245,5 +246,5 @@ workflow YAML_INPUT { } def readYAML( yamlfile ) { - return new Yaml().load( new FileReader( yamlfile.toString() ) ) + return new Yaml().load( new FileReader( yamlfile.toString())) } diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 526075da..8dc903bc 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -63,6 +63,14 @@ workflow TREEVAL { // ch_versions = Channel.empty() + exclude_workflow_steps = params.steps ? params.steps.split(",") : "NONE" + + full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] + + if (!full_list.containsAll(exclude_workflow_steps)) { + exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" + } + params.entry = 'FULL' input_ch = Channel.fromPath(params.input, checkIfExists: true) @@ -111,15 +119,17 @@ workflow TREEVAL { // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate // file with enzymatic digest sites. // - ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + if ( !exclude_workflow_steps.contains("insilico_digest")) { + ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) - INSILICO_DIGEST ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - ch_enzyme, - digest_asfile - ) - ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) + INSILICO_DIGEST ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + ch_enzyme, + digest_asfile + ) + ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) + } // // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS @@ -135,115 +145,141 @@ workflow TREEVAL { // // SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data // - GENE_ALIGNMENT ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - YAML_INPUT.out.align_data_dir, - YAML_INPUT.out.align_geneset, - YAML_INPUT.out.align_common, - YAML_INPUT.out.intron_size, - gene_alignment_asfiles - ) - ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) + if ( !exclude_workflow_steps.contains("gene_alignment")) { + GENE_ALIGNMENT ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + YAML_INPUT.out.align_data_dir, + YAML_INPUT.out.align_geneset, + YAML_INPUT.out.align_common, + YAML_INPUT.out.intron_size, + gene_alignment_asfiles + ) + ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) + } // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - REPEAT_DENSITY ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome - ) - ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) + if ( !exclude_workflow_steps.contains("repeat_density")) { + REPEAT_DENSITY ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome + ) + ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) + } // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - GAP_FINDER ( - YAML_INPUT.out.reference_ch - ) - ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) + if ( !exclude_workflow_steps.contains("gap_finder")) { + GAP_FINDER ( + YAML_INPUT.out.reference_ch + ) + ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) + } // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as // file to generate a file containing sites of self-complementary sequnce. // - SELFCOMP ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.mummer_chunk, - YAML_INPUT.out.motif_len, - selfcomp_asfile - ) - ch_versions = ch_versions.mix( SELFCOMP.out.versions ) + if ( !exclude_workflow_steps.contains("selfcomp")) { + SELFCOMP ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.mummer_chunk, + YAML_INPUT.out.motif_len, + selfcomp_asfile + ) + ch_versions = ch_versions.mix( SELFCOMP.out.versions ) + } // // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence // and generated a file of syntenic blocks. // - SYNTENY ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.synteny_path - ) - ch_versions = ch_versions.mix( SYNTENY.out.versions ) + if ( !exclude_workflow_steps.contains("synteny")) { + SYNTENY ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.synteny_path + ) + ch_versions = ch_versions.mix( SYNTENY.out.versions ) + } // // SUBWORKFLOW: Takes reference, pacbio reads // - READ_COVERAGE ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) + if ( !exclude_workflow_steps.contains("read_coverage")) { + READ_COVERAGE ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.read_ch + ) + coverage_report = READ_COVERAGE.out.ch_reporting + ch_versions = ch_versions.mix(READ_COVERAGE.out.versions) + } else { + coverage_report = [] + } // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - TELO_FINDER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.teloseq - ) - ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) + if ( !exclude_workflow_steps.contains("telo_finder")) { + TELO_FINDER ( YAML_INPUT.out.reference_ch, + YAML_INPUT.out.teloseq + ) + ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) + } // // SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS // - BUSCO_ANNOTATION ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.lineageinfo, - YAML_INPUT.out.lineagespath, - buscogene_asfile, - ancestral_table - ) - ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) + if ( !exclude_workflow_steps.contains("busco")) { + BUSCO_ANNOTATION ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.lineageinfo, + YAML_INPUT.out.lineagespath, + buscogene_asfile, + ancestral_table + ) + ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) + } // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - KMER ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( KMER.out.versions ) + if ( !exclude_workflow_steps.contains("kmer")) { + KMER ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( KMER.out.versions ) + } // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - HIC_MAPPING ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads_ch, - YAML_INPUT.out.assembly_id, - GAP_FINDER.out.gap_file, - READ_COVERAGE.out.ch_covbw_nor, - READ_COVERAGE.out.ch_covbw_avg, - TELO_FINDER.out.bedgraph_file, - REPEAT_DENSITY.out.repeat_density, - params.entry - ) - ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) + if ( !exclude_workflow_steps.contains("hic_mapping")) { + HIC_MAPPING ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads_ch, + YAML_INPUT.out.assembly_id, + GAP_FINDER.out.gap_file, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_avg, + TELO_FINDER.out.bedgraph_file, + REPEAT_DENSITY.out.repeat_density, + params.entry + ) + ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) + hic_report = HIC_MAPPING.out.ch_reporting + } else { + hic_report = [] + } // // SUBWORKFLOW: Collates version data from prior subworflows @@ -256,8 +292,8 @@ workflow TREEVAL { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( READ_COVERAGE.out.ch_reporting ) - .combine( HIC_MAPPING.out.ch_reporting ) + .combine( coverage_report ) + .combine( hic_report ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( diff --git a/workflows/treeval_jbrowse.nf b/workflows/treeval_jbrowse.nf new file mode 100755 index 00000000..62d20c7b --- /dev/null +++ b/workflows/treeval_jbrowse.nf @@ -0,0 +1,212 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowTreeval.initialise(params, log) + +// Check input path parameters to see if they exist +// params.input is the treeval yaml +def checkPathParamList = [ params.input ] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// IMPORT: SUBWORKFLOWS CALLED BY THE MAIN +// +include { YAML_INPUT } from '../subworkflows/local/yaml_input' +include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' +include { INSILICO_DIGEST } from '../subworkflows/local/insilico_digest' +include { GENE_ALIGNMENT } from '../subworkflows/local/gene_alignment' +include { SELFCOMP } from '../subworkflows/local/selfcomp' +include { SYNTENY } from '../subworkflows/local/synteny' +include { BUSCO_ANNOTATION } from '../subworkflows/local/busco_annotation' +include { KMER } from '../subworkflows/local/kmer' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// IMPORT: Installed directly from nf-core/modules +// +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow TREEVAL_JBROWSE { + main: + // + // PRE-PIPELINE CHANNEL SETTING - channel setting for required files + // + ch_versions = Channel.empty() + + params.entry = 'JBROWSE' + input_ch = Channel.fromPath(params.input, checkIfExists: true) + + Channel + .fromPath( "${projectDir}/assets/gene_alignment/assm_*.as", checkIfExists: true) + .map { it -> + tuple ([ type : it.toString().split('/')[-1].split('_')[-1].split('.as')[0] ], + file(it) + )} + .set { gene_alignment_asfiles } + + Channel + .fromPath( "${projectDir}/assets/digest/digest.as", checkIfExists: true ) + .set { digest_asfile } + + Channel + .fromPath( "${projectDir}/assets/self_comp/selfcomp.as", checkIfExists: true ) + .set { selfcomp_asfile } + + Channel + .fromPath( "${projectDir}/assets/busco_gene/busco.as", checkIfExists: true ) + .set { buscogene_asfile } + + Channel + .fromPath( "${projectDir}/assets/busco_gene/lep_ancestral.tsv", checkIfExists: true ) + .set { ancestral_table } + + // + // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field + // + YAML_INPUT ( + input_ch, + params.entry + ) + + // + // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file + // + GENERATE_GENOME ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.map_order_ch + ) + ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions ) + + // + // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate + // file with enzymatic digest sites. + // + ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + + INSILICO_DIGEST ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + ch_enzyme, + digest_asfile + ) + ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) + + // + // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS + // ON THOSE CHUNKS + // THIS WILL BE REQUIRED FOR LARGER GENOMES EST > 6GB + // + // REFERENCE_GENOME_SPLIT --> SELFCOMP + // --> GENE_ALIGNMENT + // BOTH WOULD REQUIRE A POST SUBWORKFLOW MERGE STEP TO MERGE TOGETHER THE SCAFFOLD + // BASED ALIGNMENTS/SELFCOMPS INTO A GENOME REPRESENTATIVE ONE. + // FOR GENE ALIGNMENT WOULD THIS REQUIRE A .GENOME FILE AND INDEX PER SCAFFOLD? + + // + // SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data + // + GENE_ALIGNMENT ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + YAML_INPUT.out.align_data_dir, + YAML_INPUT.out.align_geneset, + YAML_INPUT.out.align_common, + YAML_INPUT.out.intron_size, + gene_alignment_asfiles + ) + ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) + + // + // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as + // file to generate a file containing sites of self-complementary sequnce. + // + SELFCOMP ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.mummer_chunk, + YAML_INPUT.out.motif_len, + selfcomp_asfile + ) + ch_versions = ch_versions.mix( SELFCOMP.out.versions ) + + // + // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence + // and generated a file of syntenic blocks. + // + SYNTENY ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.synteny_path + ) + ch_versions = ch_versions.mix( SYNTENY.out.versions ) + + // + // SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS + // + BUSCO_ANNOTATION ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.lineageinfo, + YAML_INPUT.out.lineagespath, + buscogene_asfile, + ancestral_table + ) + ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) + + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + KMER ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( KMER.out.versions ) + + // + // SUBWORKFLOW: Collates version data from prior subworflows + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + emit: + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// PIPELINE ENTRYPOINT SUBWORKFLOWS WILL USE THE IMPLICIT ONCOMPLETE BLOCK + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index 8a483b5e..0771a72d 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -53,6 +53,14 @@ workflow TREEVAL_RAPID { main: ch_versions = Channel.empty() + exclude_workflow_steps = params.steps ? params.steps.split(",") : "NONE" + + full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] + + if (!full_list.containsAll(exclude_workflow_steps)) { + exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" + } + params.entry = 'RAPID' input_ch = Channel.fromPath(params.input, checkIfExists: true) // @@ -75,55 +83,69 @@ workflow TREEVAL_RAPID { // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - REPEAT_DENSITY ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome - ) - ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) + + if ( !exclude_workflow_steps.contains("repeat_density")) { + REPEAT_DENSITY ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome + ) + ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) + } // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - GAP_FINDER ( - YAML_INPUT.out.reference_ch - ) - ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) + if ( !exclude_workflow_steps.contains("gap_finder")) { + GAP_FINDER ( + YAML_INPUT.out.reference_ch + ) + ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) + } // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - TELO_FINDER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.teloseq - ) - ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) + if ( !exclude_workflow_steps.contains("telo_finder")) { + TELO_FINDER ( YAML_INPUT.out.reference_ch, + YAML_INPUT.out.teloseq + ) + ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) + } // // SUBWORKFLOW: Takes reference, pacbio reads // - READ_COVERAGE ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) - + if ( !exclude_workflow_steps.contains("read_coverage")) { + READ_COVERAGE ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) + } else { + coverage_report = [] + } // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - HIC_MAPPING ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads_ch, - YAML_INPUT.out.assembly_id, - GAP_FINDER.out.gap_file, - READ_COVERAGE.out.ch_covbw_nor, - READ_COVERAGE.out.ch_covbw_avg, - TELO_FINDER.out.bedgraph_file, - REPEAT_DENSITY.out.repeat_density, - params.entry - ) - ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) + if ( !exclude_workflow_steps.contains("hic_mapping")) { + HIC_MAPPING ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads_ch, + YAML_INPUT.out.assembly_id, + GAP_FINDER.out.gap_file, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_avg, + TELO_FINDER.out.bedgraph_file, + REPEAT_DENSITY.out.repeat_density, + params.entry + ) + ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) + } else { + hic_report = [] + } // // SUBWORKFLOW: Collates version data from prior subworflows @@ -136,8 +158,8 @@ workflow TREEVAL_RAPID { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( READ_COVERAGE.out.ch_reporting ) - .combine( HIC_MAPPING.out.ch_reporting ) + .combine( coverage_report ) + .combine( hic_report ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( diff --git a/workflows/treeval_rapid_tol.nf b/workflows/treeval_rapid_tol.nf index 5d651a1a..4929fd16 100755 --- a/workflows/treeval_rapid_tol.nf +++ b/workflows/treeval_rapid_tol.nf @@ -53,6 +53,14 @@ workflow TREEVAL_RAPID_TOL { main: ch_versions = Channel.empty() + exclude_workflow_steps = params.exclude ? params.exclude.split(",") : "NONE" + + full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] + + if (!full_list.containsAll(exclude_workflow_steps)) { + exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" + } + params.entry = 'RAPID_TOL' input_ch = Channel.fromPath(params.input, checkIfExists: true) // @@ -75,64 +83,80 @@ workflow TREEVAL_RAPID_TOL { // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - REPEAT_DENSITY ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome - ) - ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) + if ( !exclude_workflow_steps.contains("repeat_density")) { + REPEAT_DENSITY ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome + ) + ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) + } // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - GAP_FINDER ( - YAML_INPUT.out.reference_ch - ) - ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) + if ( !exclude_workflow_steps.contains("gap_finder")) { + GAP_FINDER ( + YAML_INPUT.out.reference_ch + ) + ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) + } // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - TELO_FINDER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.teloseq - ) - ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) + if ( !exclude_workflow_steps.contains("telo_finder")) { + TELO_FINDER ( YAML_INPUT.out.reference_ch, + YAML_INPUT.out.teloseq + ) + ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) + } // // SUBWORKFLOW: Takes reference, pacbio reads // - READ_COVERAGE ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) + if ( !exclude_workflow_steps.contains("read_coverage")) { + READ_COVERAGE ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) + } else { + coverage_report = [] + } // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - KMER ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( KMER.out.versions ) + if ( !exclude_workflow_steps.contains("kmer")) { + KMER ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( KMER.out.versions ) + } // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - HIC_MAPPING ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads_ch, - YAML_INPUT.out.assembly_id, - GAP_FINDER.out.gap_file, - READ_COVERAGE.out.ch_covbw_nor, - READ_COVERAGE.out.ch_covbw_avg, - TELO_FINDER.out.bedgraph_file, - REPEAT_DENSITY.out.repeat_density, - params.entry - ) - ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) + if ( !exclude_workflow_steps.contains("hic_mapping")) { + HIC_MAPPING ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads_ch, + YAML_INPUT.out.assembly_id, + GAP_FINDER.out.gap_file, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_avg, + TELO_FINDER.out.bedgraph_file, + REPEAT_DENSITY.out.repeat_density, + params.entry + ) + ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) + } else { + hic_report = [] + } // // SUBWORKFLOW: Collates version data from prior subworflows @@ -145,8 +169,8 @@ workflow TREEVAL_RAPID_TOL { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( READ_COVERAGE.out.ch_reporting ) - .combine( HIC_MAPPING.out.ch_reporting ) + .combine( coverage_report ) + .combine( hic_report ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple(