diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1912eaea..06ac277d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,48 +22,21 @@ jobs: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/treeval') }}" - runs-on: [ubuntu-latest] # Let's see if Pretext errors are a architecture thing + runs-on: [ubuntu-latest] strategy: matrix: NXF_VER: - "22.10.1" - "latest-everything" steps: - - name: Get branch names - # Pulls the names of current branches in repo - # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to - id: branch-names - uses: tj-actions/branch-names@v8 + - name: Check out pipeline code + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Setup apptainer - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install nf-core - run: | - pip install nf-core - - - name: NF-Core Download - download singularity containers - # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found - # Must occur after singularity install or will crash trying to dl containers - # Zip up this fresh download and run the checked out version - run: | - nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system singularity - - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. run: | @@ -72,4 +45,4 @@ jobs: - name: Singularity - Run FULL pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ./sanger-treeval/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test_github,singularity --outdir ./Sing-Full + nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./Sing-Full diff --git a/.gitignore b/.gitignore index fb2aa578..2e860182 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,8 @@ output/ null/ error out +OscSUBSET-TEST +OscSUBSET-TEST-MODULE-UPDATE +OscSUBSET-TEST-MODULE-UPDATE-3 +TreeValTinyData +yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 6be0c8b6..dd0869b0 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -118,13 +118,6 @@ This builds on the initial release by adding subworkflows which generate kmer ba - Fix a bug in build_alignment_blocks.py to avoid indexing errors happening in large genomes. - Change output BEDGRAPH from EXTRACT_TELO module. -#### Hot Fix 1 - -- Generate CRAM CSV fix to allow for multi-readgroup cram files -- Removing KMER_READCOV -- tmp directory was being used -- Output file adjustment (names and location) - ### Parameters | Old Parameter | New Parameter | diff --git a/CITATIONS.md b/CITATIONS.md index 2c7295da..78d4983e 100755 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -34,10 +34,6 @@ > Durand, N.C. et al. 2016. ‘Juicer provides a one-click system for analyzing loop-resolution hi-C experiments’, Cell Systems, 3(1), pp. 95–98. doi:10.1016/j.cels.2016.07.002. -- [Merqury_FK](https://github.com/thegenemyers/MERQURY.FK) - - > Myers, G., Rhie, A. (2024). MerquryFK & KatFK. [online]. https://github.com/thegenemyers/MERQURY.FK. (Accessed on 20 September 2024). - - [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/) > Li, H. 2021. ‘New strategies to improve MINIMAP2 alignment accuracy’, Bioinformatics, 37(23), pp. 4572–4574. doi:10.1093/bioinformatics/btab705. @@ -76,7 +72,7 @@ - [Samtools](https://pubmed.ncbi.nlm.nih.gov/33590861/) - > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. + > Di Tommaso, Paolo, et al. 2017. “Nextflow Enables Reproducible Computational Workflows.” Nature Biotechnology, 35(4), pp. 316–19, https://doi.org/10.1038/nbt.3820. - [SeqTK](https://github.com/lh3/seqtk) diff --git a/main.nf b/main.nf index 6931d2bd..2a9fc377 100755 --- a/main.nf +++ b/main.nf @@ -25,7 +25,6 @@ WorkflowMain.initialise( workflow, params, log ) include { TREEVAL } from './workflows/treeval' include { TREEVAL_RAPID } from './workflows/treeval_rapid' include { TREEVAL_RAPID_TOL } from './workflows/treeval_rapid_tol' -include { TREEVAL_JBROWSE } from './workflows/treeval_jbrowse' // // WORKFLOW: RUN MAIN PIPELINE GENERATING ALL OUTPUT @@ -48,15 +47,6 @@ workflow SANGERTOL_TREEVAL_RAPID_TOL { TREEVAL_RAPID_TOL () } -// -// WORKFLOW: RUN ONLY THE SUBWORKFLOWS REQUIRED FOR JBROWSE UPLOAD -// - THIS IS TO COMPLEMENT A NEW PROCESS WHERE MAJORITY OF TICKETS WILL BE RC -// AND GET REQUESTED FOR FULL -// -workflow SANGERTOL_TREEVAL_JBROWSE { - TREEVAL_JBROWSE () -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -78,10 +68,6 @@ workflow RAPID_TOL { SANGERTOL_TREEVAL_RAPID_TOL () } -workflow JBROWSE { - SANGERTOL_TREEVAL_JBROWSE () -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules/local/avgcov.nf b/modules/local/avgcov.nf index 0077f2bd..7e1e477e 100755 --- a/modules/local/avgcov.nf +++ b/modules/local/avgcov.nf @@ -4,8 +4,8 @@ process AVGCOV { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) diff --git a/modules/local/bamtobed_sort.nf b/modules/local/bamtobed_sort.nf index bd1f0cfc..c9d73306 100755 --- a/modules/local/bamtobed_sort.nf +++ b/modules/local/bamtobed_sort.nf @@ -6,11 +6,6 @@ process BAMTOBED_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' : 'biocontainers/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "BAMTOBED_SORT module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(bam) diff --git a/modules/local/concatblocks.nf b/modules/local/concatblocks.nf index f58641de..5c01459d 100755 --- a/modules/local/concatblocks.nf +++ b/modules/local/concatblocks.nf @@ -4,8 +4,8 @@ process CONCATBLOCKS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(mergeblocks) diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf index 06624c52..ca706e28 100755 --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -6,11 +6,6 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix), path(reference) diff --git a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf index a9d2b977..8d8d69e4 100755 --- a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf +++ b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf @@ -6,11 +6,6 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference) diff --git a/modules/local/extract_buscogene.nf b/modules/local/extract_buscogene.nf index a5cd3a5e..44149d74 100755 --- a/modules/local/extract_buscogene.nf +++ b/modules/local/extract_buscogene.nf @@ -4,8 +4,8 @@ process EXTRACT_BUSCOGENE { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: diff --git a/modules/local/extract_cov_iden.nf b/modules/local/extract_cov_iden.nf index bddcbab7..d50fd39c 100755 --- a/modules/local/extract_cov_iden.nf +++ b/modules/local/extract_cov_iden.nf @@ -4,15 +4,15 @@ process EXTRACT_COV_IDEN { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*.bed") , emit: punchlist - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.bed" ) , emit: punchlist + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" diff --git a/modules/local/extract_repeat.nf b/modules/local/extract_repeat.nf index 39f7ee23..85fe9c93 100755 --- a/modules/local/extract_repeat.nf +++ b/modules/local/extract_repeat.nf @@ -8,11 +8,11 @@ process EXTRACT_REPEAT { 'biocontainers/perl:5.26.2' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), path("*.bed") , emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), path( "*.bed" ) , emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index cfd25908..c39e665c 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -4,16 +4,16 @@ process EXTRACT_TELO { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed") , emit: bed - tuple val(meta), file("*bedgraph"), emit: bedgraph - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ) , emit: bed + tuple val( meta ), file("*bedgraph"), emit: bedgraph + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/find_telomere_regions.nf b/modules/local/find_telomere_regions.nf index de3307cf..092926dc 100755 --- a/modules/local/find_telomere_regions.nf +++ b/modules/local/find_telomere_regions.nf @@ -4,18 +4,13 @@ process FIND_TELOMERE_REGIONS { container 'quay.io/sanger-tol/telomere:0.0.1-c1' - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "FIND_TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." - } - input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) val (telomereseq) output: - tuple val(meta), file("*.telomere") , emit: telomere - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.telomere" ) , emit: telomere + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/find_telomere_windows.nf b/modules/local/find_telomere_windows.nf index 675fd544..2fcd0022 100755 --- a/modules/local/find_telomere_windows.nf +++ b/modules/local/find_telomere_windows.nf @@ -3,16 +3,17 @@ process FIND_TELOMERE_WINDOWS { label 'process_low' conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && + !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : 'biocontainers/java-jdk:8.0.112--1' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*.windows") , emit: windows - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.windows" ) , emit: windows + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf index dd1a5878..b5bf0733 100755 --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -4,15 +4,15 @@ process GAP_LENGTH { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bedgraph") , emit: bedgraph - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bedgraph" ) , emit: bedgraph + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index 2bccab9d..045160b7 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -6,11 +6,6 @@ process GENERATE_CRAM_CSV { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GENERATE_CRAM_CSV module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(crampath) diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf index 091abc3e..2296958c 100755 --- a/modules/local/get_largest_scaff.nf +++ b/modules/local/get_largest_scaff.nf @@ -5,11 +5,11 @@ process GET_LARGEST_SCAFF { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: env largest_scaff , emit: scaff_size diff --git a/modules/local/get_paired_contact_bed.nf b/modules/local/get_paired_contact_bed.nf index b3db6d1f..e6d3a135 100755 --- a/modules/local/get_paired_contact_bed.nf +++ b/modules/local/get_paired_contact_bed.nf @@ -4,15 +4,15 @@ process GET_PAIRED_CONTACT_BED { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed") , emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ) , emit: bed + path "versions.yml" , emit: versions script: def pulled = '-T sort_tmp' diff --git a/modules/local/getminmaxpunches.nf b/modules/local/getminmaxpunches.nf index 0a095b29..6e828bb5 100755 --- a/modules/local/getminmaxpunches.nf +++ b/modules/local/getminmaxpunches.nf @@ -4,16 +4,16 @@ process GETMINMAXPUNCHES{ conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) output: - tuple val(meta), path ('*zero.bed') , optional: true , emit: min - tuple val(meta), path ('*max.bed') , optional: true , emit: max - path "versions.yml" , emit: versions + tuple val(meta), path ( '*zero.bed' ) , optional: true , emit: min + tuple val(meta), path ( '*max.bed' ) , optional: true , emit: max + path "versions.yml" , emit: versions shell: def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/graphoverallcoverage.nf b/modules/local/graphoverallcoverage.nf index 572e793d..10c0a112 100755 --- a/modules/local/graphoverallcoverage.nf +++ b/modules/local/graphoverallcoverage.nf @@ -2,14 +2,10 @@ process GRAPHOVERALLCOVERAGE { tag "$meta.id" label "process_single" + conda "conda-forge::perl=5.26.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : - 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GRAPHOVERALLCOVERAGE module does not support Conda. Please use Docker / Singularity instead." - } + 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : + 'biocontainers/perl:5.26.2' }" input: tuple val(meta), path(bed) diff --git a/modules/local/juicer_tools_pre.nf b/modules/local/juicer_tools_pre.nf index d12ec28c..12b46ce8 100755 --- a/modules/local/juicer_tools_pre.nf +++ b/modules/local/juicer_tools_pre.nf @@ -5,7 +5,8 @@ process JUICER_TOOLS_PRE { label 'process_medium' conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && + !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : 'biocontainers/java-jdk:8.0.112--1' }" diff --git a/modules/local/paf_to_bed.nf b/modules/local/paf_to_bed.nf index 445d3b2f..c50f0373 100755 --- a/modules/local/paf_to_bed.nf +++ b/modules/local/paf_to_bed.nf @@ -4,15 +4,15 @@ process PAF2BED { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*_punchlist.bed"), emit: punchlist - path "versions.yml" , emit: versions + tuple val( meta ), file( "*_punchlist.bed" ), emit: punchlist + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" diff --git a/modules/local/pretext_graph.nf b/modules/local/pretext_graph.nf index 60527482..f2bdc029 100644 --- a/modules/local/pretext_graph.nf +++ b/modules/local/pretext_graph.nf @@ -4,11 +4,6 @@ process PRETEXT_GRAPH { container "quay.io/sanger-tol/pretext:0.0.3-yy5-c1" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "PRETEXT_GRAPH module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(pretext_file) tuple val(gap), path(gap_file) @@ -18,8 +13,8 @@ process PRETEXT_GRAPH { tuple val(rep), path(repeat_density) output: - tuple val(meta), path("*.pretext") , emit: pretext - path "versions.yml" , emit: versions + tuple val(meta), path("*.pretext") , emit: pretext + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/reformat_intersect.nf b/modules/local/reformat_intersect.nf index 3c4cdb61..bcc0be77 100755 --- a/modules/local/reformat_intersect.nf +++ b/modules/local/reformat_intersect.nf @@ -4,15 +4,14 @@ process REFORMAT_INTERSECT { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*.bed"), emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.bed" ), emit: bed shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/rename_ids.nf b/modules/local/rename_ids.nf index 0be66c27..ba5db7ca 100755 --- a/modules/local/rename_ids.nf +++ b/modules/local/rename_ids.nf @@ -4,15 +4,15 @@ process RENAME_IDS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed") , emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ) , emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/replace_dots.nf b/modules/local/replace_dots.nf index bb0f051e..4d12f5cd 100755 --- a/modules/local/replace_dots.nf +++ b/modules/local/replace_dots.nf @@ -4,15 +4,15 @@ process REPLACE_DOTS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed"), emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ), emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/nextflow.config b/nextflow.config index 64ea209f..2af4a6fc 100755 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,6 @@ params { input = null outdir = "./results" juicer = false - steps = "NONE" tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null @@ -28,6 +27,7 @@ params { show_hidden_params = false schema_ignore_params = 'genomes' binfile = false + steps = 'NONE' // Config options custom_config_version = 'master' @@ -255,4 +255,4 @@ def round_up ( decimal_places, file, multiplier ){ } else { return dived * multiplier } -} \ No newline at end of file +} diff --git a/subworkflows/local/ancestral_gene.nf b/subworkflows/local/ancestral_gene.nf index 6ac72ab9..cfee2061 100755 --- a/subworkflows/local/ancestral_gene.nf +++ b/subworkflows/local/ancestral_gene.nf @@ -33,10 +33,10 @@ workflow ANCESTRAL_GENE { // LOGIC: STRIP OUT METADATA // ch_grab - .map {meta, fulltable + .map { meta, fulltable -> fulltable } - .set {assignanc_input} + .set { assignanc_input } // // MODULE: ASSIGN EXTRACTED GENES TO ANCESTRAL GROUPS @@ -61,7 +61,7 @@ workflow ANCESTRAL_GENE { // UCSC_BEDTOBIGBED( BEDTOOLS_SORT.out.sorted, - dot_genome.map{it[1]}, // Pull file from tuple(meta, file) + dot_genome.map{ it[1] }, // Pull file from tuple(meta, file) buscogene_as ) ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) diff --git a/subworkflows/local/busco_annotation.nf b/subworkflows/local/busco_annotation.nf index 75182788..a3227a1b 100755 --- a/subworkflows/local/busco_annotation.nf +++ b/subworkflows/local/busco_annotation.nf @@ -31,7 +31,7 @@ workflow BUSCO_ANNOTATION { ch_versions = Channel.empty() // COMMENT: Set BUSCO mode to 'genome' - ch_busco_mode = Channel.of("genome") + ch_busco_mode = Channel.of( "genome" ) // @@ -55,7 +55,7 @@ workflow BUSCO_ANNOTATION { EXTRACT_BUSCOGENE ( ch_grab ) - ch_versions = ch_versions.mix(EXTRACT_BUSCOGENE.out.versions) + ch_versions = ch_versions.mix( EXTRACT_BUSCOGENE.out.versions ) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE @@ -68,7 +68,7 @@ workflow BUSCO_ANNOTATION { file ) } - .set {bedtools_input} + .set { bedtools_input } // // MODULE: SORT THE EXTRACTED BUSCO GENE // @@ -76,7 +76,7 @@ workflow BUSCO_ANNOTATION { bedtools_input, [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: CONVERT THE BED TO BIGBED @@ -86,7 +86,7 @@ workflow BUSCO_ANNOTATION { dot_genome.map{it[1]}, // Gets file from tuple (meta, file) buscogene_as ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) // // LOGIC: AGGREGATE DATA AND SORT BRANCH ON CLASS @@ -98,18 +98,18 @@ workflow BUSCO_ANNOTATION { lep: it[0].split('_')[0] == "lepidoptera" general: it[0].split('_')[0] != "lepidoptera" } - .set{ch_busco_data} + .set{ ch_busco_data } // // LOGIC: BUILD NEW INPUT CHANNEL FOR ANCESTRAL ID // ch_busco_data .lep - .multiMap {lineage, meta, busco_dir, ancestral_table -> + .multiMap { lineage, meta, busco_dir, ancestral_table -> busco_dir: tuple( meta, busco_dir ) atable: ancestral_table } - .set{ch_busco_lep_data} + .set{ ch_busco_lep_data } // // SUBWORKFLOW: RUN ANCESTRAL BUSCO ID (ONLY AVAILABLE FOR LEPIDOPTERA) @@ -120,7 +120,7 @@ workflow BUSCO_ANNOTATION { buscogene_as, ch_busco_lep_data.atable ) - ch_versions = ch_versions.mix(ANCESTRAL_GENE.out.versions) + ch_versions = ch_versions.mix( ANCESTRAL_GENE.out.versions ) emit: ch_buscogene_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/gap_finder.nf b/subworkflows/local/gap_finder.nf index 89feaf49..5b53d908 100755 --- a/subworkflows/local/gap_finder.nf +++ b/subworkflows/local/gap_finder.nf @@ -20,7 +20,7 @@ workflow GAP_FINDER { SEQTK_CUTN ( reference_tuple ) - ch_versions = ch_versions.mix(SEQTK_CUTN.out.versions) + ch_versions = ch_versions.mix( SEQTK_CUTN.out.versions ) // // MODULE: ADD THE LENGTH OF GAP TO BED FILE - INPUT FOR PRETEXT MODULE @@ -28,7 +28,7 @@ workflow GAP_FINDER { GAP_LENGTH ( SEQTK_CUTN.out.bed ) - ch_versions = ch_versions.mix(GAP_LENGTH.out.versions) + ch_versions = ch_versions.mix( GAP_LENGTH.out.versions ) // // MODULE: BGZIP AND TABIX THE GAP FILE @@ -36,7 +36,7 @@ workflow GAP_FINDER { TABIX_BGZIPTABIX ( SEQTK_CUTN.out.bed ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: gap_file = GAP_LENGTH.out.bedgraph diff --git a/subworkflows/local/gene_alignment.nf b/subworkflows/local/gene_alignment.nf index d3c30e54..1936bc99 100755 --- a/subworkflows/local/gene_alignment.nf +++ b/subworkflows/local/gene_alignment.nf @@ -29,7 +29,7 @@ workflow GENE_ALIGNMENT { .map{ meta, file -> "${meta.class}" } - .set {assembly_class} + .set { assembly_class } // @@ -52,8 +52,8 @@ workflow GENE_ALIGNMENT { geneset_path -> file(geneset_path) } - .splitCsv(header: true, sep:',') - .map(row -> + .splitCsv( header: true, sep:',') + .map( row -> tuple([ org: row.org, type: row.type, id: row.data_file.split('/')[-1].split('.MOD.')[0] @@ -90,7 +90,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix(GEN_ALIGNMENTS.out.versions) + ch_versions = ch_versions.mix( GEN_ALIGNMENTS.out.versions ) CDS_ALIGNMENTS ( reference_tuple, reference_index, @@ -98,7 +98,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix(CDS_ALIGNMENTS.out.versions) + ch_versions = ch_versions.mix( CDS_ALIGNMENTS.out.versions ) RNA_ALIGNMENTS ( reference_tuple, reference_index, @@ -106,7 +106,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix(RNA_ALIGNMENTS.out.versions) + ch_versions = ch_versions.mix( RNA_ALIGNMENTS.out.versions ) emit: pep_gff = PEP_ALIGNMENTS.out.tbi_gff diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf index 166475e3..5c4d06a5 100755 --- a/subworkflows/local/generate_genome.nf +++ b/subworkflows/local/generate_genome.nf @@ -3,6 +3,7 @@ // // MODULE IMPORT BLOCK // +include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff' include { GENERATE_UNSORTED_GENOME } from '../../subworkflows/local/generate_unsorted_genome' include { GENERATE_SORTED_GENOME } from '../../subworkflows/local/generate_sorted_genome' @@ -23,7 +24,7 @@ workflow GENERATE_GENOME { reference_file .combine(map_order) - .map{ref_meta, ref, map_order -> + .map{ ref_meta, ref, map_order -> tuple( [ id: ref_meta.id, map_order :map_order @@ -43,7 +44,7 @@ workflow GENERATE_GENOME { GENERATE_SORTED_GENOME ( ch_genomesize_input.sorted ) - ch_versions = ch_versions.mix(GENERATE_SORTED_GENOME.out.versions) + ch_versions = ch_versions.mix( GENERATE_SORTED_GENOME.out.versions ) ch_genomesize = GENERATE_SORTED_GENOME.out.genomesize ch_genome_fai = GENERATE_SORTED_GENOME.out.ref_index ch_versions = GENERATE_SORTED_GENOME.out.versions @@ -54,12 +55,22 @@ workflow GENERATE_GENOME { GENERATE_UNSORTED_GENOME ( ch_genomesize_input.unsorted ) - ch_versions = ch_versions.mix(GENERATE_UNSORTED_GENOME.out.versions) - ch_genomesize = ch_genomesize.mix(GENERATE_UNSORTED_GENOME.out.genomesize) - ch_genome_fai = ch_genome_fai.mix(GENERATE_UNSORTED_GENOME.out.ref_index) + ch_versions = ch_versions.mix( GENERATE_UNSORTED_GENOME.out.versions ) + ch_genomesize = ch_genomesize.mix( GENERATE_UNSORTED_GENOME.out.genomesize ) + ch_genome_fai = ch_genome_fai.mix( GENERATE_UNSORTED_GENOME.out.ref_index ) ch_versions = GENERATE_UNSORTED_GENOME.out.versions + // + // MODULE: Cut out the largest scaffold size and use as comparator against 512MB + // This is the cut off for TABIX using tbi indexes + // + GET_LARGEST_SCAFF ( + ch_genomesize + ) + ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions ) + emit: + max_scaff_size = GET_LARGEST_SCAFF.out.scaff_size.toInteger() dot_genome = ch_genomesize ref_index = ch_genome_fai ref = reference_file diff --git a/subworkflows/local/generate_sorted_genome.nf b/subworkflows/local/generate_sorted_genome.nf index 71ec98b7..bc38e2dd 100755 --- a/subworkflows/local/generate_sorted_genome.nf +++ b/subworkflows/local/generate_sorted_genome.nf @@ -18,13 +18,13 @@ workflow GENERATE_SORTED_GENOME { reference_file, "unsorted.genome" ) - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) genome_size = CUSTOM_GETCHROMSIZES.out.sizes GNU_SORT ( CUSTOM_GETCHROMSIZES.out.sizes ) - ch_versions = ch_versions.mix(GNU_SORT.out.versions) + ch_versions = ch_versions.mix( GNU_SORT.out.versions ) emit: genomesize = GNU_SORT.out.sorted diff --git a/subworkflows/local/generate_unsorted_genome.nf b/subworkflows/local/generate_unsorted_genome.nf index de5e6f0c..93bf8e66 100755 --- a/subworkflows/local/generate_unsorted_genome.nf +++ b/subworkflows/local/generate_unsorted_genome.nf @@ -17,7 +17,7 @@ workflow GENERATE_UNSORTED_GENOME { reference_file, "unsorted.genome" ) - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) emit: diff --git a/subworkflows/local/hic_bamtobed.nf b/subworkflows/local/hic_bamtobed.nf index e8b2ae70..d6e8eb34 100755 --- a/subworkflows/local/hic_bamtobed.nf +++ b/subworkflows/local/hic_bamtobed.nf @@ -28,7 +28,7 @@ workflow HIC_BAMTOBED { bam_file, reference_tuple ) - ch_versions = ch_versions.mix (SAMTOOLS_MARKDUP.out.versions) + ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) // // MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE @@ -36,7 +36,7 @@ workflow HIC_BAMTOBED { BAMTOBED_SORT( SAMTOOLS_MARKDUP.out.bam ) - ch_versions = ch_versions.mix(BAMTOBED_SORT.out.versions) + ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions ) // // MODULE: GENERATE CONTACT PAIRS @@ -44,7 +44,7 @@ workflow HIC_BAMTOBED { GET_PAIRED_CONTACT_BED( BAMTOBED_SORT.out.sorted_bed ) - ch_versions = ch_versions.mix(GET_PAIRED_CONTACT_BED.out.versions) + ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions ) emit: paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed diff --git a/subworkflows/local/hic_bwamem2.nf b/subworkflows/local/hic_bwamem2.nf index 67d861cb..9409cf7a 100755 --- a/subworkflows/local/hic_bwamem2.nf +++ b/subworkflows/local/hic_bwamem2.nf @@ -24,14 +24,14 @@ workflow HIC_BWAMEM2 { BWAMEM2_INDEX ( reference_tuple - ) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) + ) + ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) csv_ch .splitCsv() - .combine (reference_tuple) - .combine (BWAMEM2_INDEX.out.index) - .map{cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> + .combine ( reference_tuple ) + .combine ( BWAMEM2_INDEX.out.index ) + .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> tuple([ id: cram_id.id ], @@ -46,7 +46,7 @@ workflow HIC_BWAMEM2 { ref_dir ) } - .set {ch_filtering_input} + .set { ch_filtering_input } // // MODULE: map hic reads by 10,000 container per time using bwamem2 @@ -55,18 +55,18 @@ workflow HIC_BWAMEM2 { ch_filtering_input ) - ch_versions = ch_versions.mix(CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions) + ch_versions = ch_versions.mix( CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions ) mappedbam_ch = CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.mappedbam // // LOGIC: PREPARING BAMS FOR MERGE // mappedbam_ch - .map{meta, file -> + .map{ meta, file -> tuple( file ) } .collect() - .map {file -> + .map { file -> tuple ( [ id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] @@ -74,7 +74,7 @@ workflow HIC_BWAMEM2 { file ) } - .set {collected_files_for_merge} + .set { collected_files_for_merge } // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -84,7 +84,7 @@ workflow HIC_BWAMEM2 { reference_tuple, reference_index ) - ch_versions = ch_versions.mix (SAMTOOLS_MERGE.out.versions.first()) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) emit: diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index 14d7cac3..bd30e0df 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -42,21 +42,21 @@ workflow HIC_MAPPING { ch_versions = Channel.empty() // COMMENT: 1000bp BIN SIZE INTERVALS FOR CLOAD - ch_cool_bin = Channel.of(1000) + ch_cool_bin = Channel.of( 1000 ) // // LOGIC: make channel of hic reads as input for GENERATE_CRAM_CSV // reference_tuple - .combine(hic_reads_path) - .map {meta, ref, hic_meta, hic_reads_path -> + .combine( hic_reads_path ) + .map { meta, ref, hic_meta, hic_reads_path -> tuple( [ id: meta.id, single_end: true], hic_reads_path ) } - .set {get_reads_input} + .set { get_reads_input } // // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT @@ -64,21 +64,21 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV ( get_reads_input ) - ch_versions = ch_versions.mix(GENERATE_CRAM_CSV.out.versions) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) // // LOGIC: make branches for different hic aligner. // hic_reads_path .combine(reference_tuple) - .map{meta, hic_read_path, ref_meta, ref-> - tuple( - [ id : ref_meta, - aligner : meta.aligner - ], - ref - ) - } + .map{ meta, hic_read_path, ref_meta, ref-> + tuple( + [ id : ref_meta, + aligner : meta.aligner + ], + ref + ) + } .branch{ minimap2 : it[0].aligner == "minimap2" bwamem2 : it[0].aligner == "bwamem2" @@ -93,7 +93,7 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV.out.csv, reference_index ) - ch_versions = ch_versions.mix(HIC_MINIMAP2.out.versions) + ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) mergedbam = HIC_MINIMAP2.out.mergedbam // @@ -104,18 +104,18 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV.out.csv, reference_index ) - ch_versions = ch_versions.mix(HIC_BWAMEM2.out.versions) + ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) mergedbam = mergedbam.mix(HIC_BWAMEM2.out.mergedbam) // // LOGIC: PREPARING PRETEXT MAP INPUT // mergedbam - .combine(reference_tuple) - .combine (dot_genome) + .combine( reference_tuple ) + .combine ( dot_genome ) .multiMap { bam_meta, bam, ref_meta, ref_fa, genome_meta, genome_file -> input_bam: tuple( [ id: bam_meta.id, - sz: file(bam).size() ], + sz: file( bam ).size() ], bam ) // NOTE: Inject the genome file into the channel to speed up PretextMap @@ -125,19 +125,19 @@ workflow HIC_MAPPING { ) } .set {pretext_input} - + if ( params.binfile == true ) { // // LOGIC: MAKE YAHS INPUT // - ref_yahs.map { meta, ref -> ref }.set{ch_ref} + ref_yahs.map { meta, ref -> ref }.set{ch_ref} reference_index.map { meta, fai -> fai }.set{ch_fai} // // MODULE: RUN YAHS TO GENERATE ALIGNMENT BIN FILE // - YAHS ( + YAHS ( mergedbam, ch_ref, ch_fai @@ -151,7 +151,7 @@ workflow HIC_MAPPING { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) + ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) // // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT @@ -164,7 +164,7 @@ workflow HIC_MAPPING { telo_file, repeat_density_file ) - ch_versions = ch_versions.mix(PRETEXT_INGEST_SNDRD.out.versions) + ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES @@ -173,7 +173,7 @@ workflow HIC_MAPPING { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_HIGHRES.out.versions) + ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) // // NOTICE: This could fail on LARGE hires maps due to some memory parameter in the C code @@ -188,7 +188,7 @@ workflow HIC_MAPPING { telo_file, repeat_density_file ) - ch_versions = ch_versions.mix(PRETEXT_INGEST_HIRES.out.versions) + ch_versions = ch_versions.mix( PRETEXT_INGEST_HIRES.out.versions ) // // MODULE: GENERATE PNG FROM STANDARD PRETEXT @@ -196,7 +196,7 @@ workflow HIC_MAPPING { SNAPSHOT_SRES ( PRETEXTMAP_STANDRD.out.pretext ) - ch_versions = ch_versions.mix (SNAPSHOT_SRES.out.versions) + ch_versions = ch_versions.mix ( SNAPSHOT_SRES.out.versions ) // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G @@ -214,14 +214,14 @@ workflow HIC_MAPPING { tosubsample : it[0].sz >= 50000000000 unmodified : it[0].sz < 50000000000 } - .set {ch_merged_bam} + .set { ch_merged_bam } // LOGIC: PREPARE BAMTOBED JUICER INPUT. - if (workflow_setting != "RAPID_TOL" && params.juicer == false) { + if ( workflow_setting != "RAPID_TOL" && params.juicer == false ) { // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G // - mergedbam + mergedbam .map{ meta, bam -> tuple( [ id : meta.id, @@ -234,7 +234,7 @@ workflow HIC_MAPPING { tosubsample : it[0].sz >= 50000000000 unmodified : it[0].sz < 50000000000 } - .set {ch_merged_bam} + .set { ch_merged_bam } // // MODULE: SUBSAMPLE BAM @@ -242,7 +242,7 @@ workflow HIC_MAPPING { SUBSAMPLE_BAM ( ch_merged_bam.tosubsample ) - ch_versions = ch_versions.mix (SUBSAMPLE_BAM.out.versions) + ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) // // LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT @@ -254,12 +254,12 @@ workflow HIC_MAPPING { // LOGIC: PREPARE BAMTOBED JUICER INPUT // ch_subsampled_bam - .combine(reference_tuple) + .combine( reference_tuple ) .multiMap { meta, subsampled_bam, meta_ref, ref -> bam : tuple(meta, subsampled_bam ) reference : tuple(meta_ref, ref) } - .set {ch_bamtobed_juicer_input} + .set { ch_bamtobed_juicer_input } // // SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM @@ -268,19 +268,19 @@ workflow HIC_MAPPING { ch_bamtobed_juicer_input.bam, ch_bamtobed_juicer_input.reference ) - ch_versions = ch_versions.mix(HIC_BAMTOBED_JUICER.out.versions) + ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions ) // // LOGIC: PREPARE JUICER TOOLS INPUT // HIC_BAMTOBED_JUICER.out.paired_contacts_bed - .combine(dot_genome) + .combine( dot_genome ) .multiMap { meta, paired_contacts, meta_my_genome, my_genome -> - paired : tuple([id: meta.id, single_end: true], paired_contacts) + paired : tuple([ id: meta.id, single_end: true], paired_contacts ) genome : my_genome id : meta.id } - .set {ch_juicer_input} + .set { ch_juicer_input } // // MODULE: GENERATE HIC MAP, ONLY IS PIPELINE IS RUNNING ON ENTRY FULL @@ -290,19 +290,19 @@ workflow HIC_MAPPING { ch_juicer_input.genome, ch_juicer_input.id ) - ch_versions = ch_versions.mix(JUICER_TOOLS_PRE.out.versions) + ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) } // // LOGIC: PREPARE BAMTOBED COOLER INPUT // mergedbam - .combine(reference_tuple) + .combine( reference_tuple ) .multiMap { meta, merged_bam, meta_ref, ref -> bam : tuple(meta, merged_bam ) reference : tuple(meta_ref, ref) } - .set {ch_bamtobed_cooler_input} + .set { ch_bamtobed_cooler_input } // // SUBWORKFLOW: BAM TO BED FOR COOLER @@ -311,26 +311,26 @@ workflow HIC_MAPPING { ch_bamtobed_cooler_input.bam, ch_bamtobed_cooler_input.reference ) - ch_versions = ch_versions.mix(HIC_BAMTOBED_COOLER.out.versions) + ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions ) // // LOGIC: BIN CONTACT PAIRS // HIC_BAMTOBED_COOLER.out.paired_contacts_bed - .join(HIC_BAMTOBED_COOLER.out.sorted_bed) - .combine( ch_cool_bin) - .set {ch_binned_pairs} + .join( HIC_BAMTOBED_COOLER.out.sorted_bed ) + .combine( ch_cool_bin ) + .set { ch_binned_pairs } // // LOGIC: PREPARE COOLER INPUT // ch_binned_pairs .combine(dot_genome) - .multiMap {meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> - cooler_in : tuple (meta, pairs, bed, cool_bin) + .multiMap { meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> + cooler_in : tuple ( meta, pairs, bed, cool_bin ) genome_file : my_genome } - .set {ch_cooler} + .set { ch_cooler } // // MODULE: GENERATE A MULTI-RESOLUTION COOLER FILE BY COARSENING @@ -345,7 +345,7 @@ workflow HIC_MAPPING { // LOGIC: REFACTOR CHANNEL FOR ZOOMIFY // COOLER_CLOAD.out.cool - .map{meta, cools, cool_bin -> + .map{ meta, cools, cool_bin -> [meta, cools] } .set{ch_cool} @@ -364,14 +364,14 @@ workflow HIC_MAPPING { ch_cram_files .collect() - .map {meta, cram -> + .map { meta, cram -> tuple( [ id: 'cram', sz: cram instanceof ArrayList ? cram.collect { it.size()} : cram.size(), ], cram ) } - .combine(GENERATE_CRAM_CSV.out.csv) + .combine( GENERATE_CRAM_CSV.out.csv ) .map { meta, data, meta2, csv -> tuple( [ id: meta.id, sz: meta.sz, @@ -380,7 +380,7 @@ workflow HIC_MAPPING { data ) } - .set {ch_reporting_cram} + .set { ch_reporting_cram } emit: mcool = COOLER_ZOOMIFY.out.mcool diff --git a/subworkflows/local/hic_minimap2.nf b/subworkflows/local/hic_minimap2.nf index 76b7cf74..b37ff30d 100755 --- a/subworkflows/local/hic_minimap2.nf +++ b/subworkflows/local/hic_minimap2.nf @@ -19,27 +19,27 @@ workflow HIC_MINIMAP2 { reference_tuple // Channel: tuple [ val(meta), path( file ) ] csv_ch reference_index - + main: ch_versions = Channel.empty() mappedbam_ch = Channel.empty() // - // MODULE: generate minimap2 mmi file - // + // MODULE: generate minimap2 mmi file + // MINIMAP2_INDEX ( reference_tuple - ) - ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) + ) + ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) // // LOGIC: generate input channel for mapping - // + // csv_ch .splitCsv() - .combine (reference_tuple) - .combine (MINIMAP2_INDEX.out.index) - .map{cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path -> + .combine ( reference_tuple ) + .combine ( MINIMAP2_INDEX.out.index ) + .map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path-> tuple([ id: cram_id.id ], @@ -54,16 +54,16 @@ workflow HIC_MINIMAP2 { ref_dir ) } - .set {ch_filtering_input} + .set { ch_filtering_input } // // MODULE: map hic reads by 10,000 container per time - // + // CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT ( ch_filtering_input ) - ch_versions = ch_versions.mix(CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions) + ch_versions = ch_versions.mix( CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions ) mappedbam_ch = CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.mappedbam @@ -71,19 +71,19 @@ workflow HIC_MINIMAP2 { // LOGIC: PREPARING BAMS FOR MERGE // mappedbam_ch - .map{meta, file -> - tuple(file) + .map{ meta, file -> + tuple( file ) } .collect() - .map {file -> + .map { file -> tuple ( [ - id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] ], file ) } - .set {collected_files_for_merge} + .set { collected_files_for_merge } // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -93,10 +93,10 @@ workflow HIC_MINIMAP2 { reference_tuple, reference_index ) - ch_versions = ch_versions.mix (SAMTOOLS_MERGE.out.versions.first()) - + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + emit: - mergedbam = SAMTOOLS_MERGE.out.bam + mergedbam = SAMTOOLS_MERGE.out.bam versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index 314d8970..fe6c0f46 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -27,24 +27,24 @@ workflow INSILICO_DIGEST { // MULTIMAP INTO TWO CHANNELS SO THERE IS REFERENCE * ENZYME CHANNELS // reference - .map {meta, data -> + .map { meta, data -> tuple( [ id : meta.id, single_end : false ], file( data ) ) } - .set {input_fasta} + .set { input_fasta } input_fasta .combine(ch_enzyme) - .multiMap {meta, reference, enzyme_id -> + .multiMap { meta, reference, enzyme_id -> fasta : tuple( meta, reference ) enzyme : enzyme_id } - .set {fa2c_input} + .set { fa2c_input } // // MODULE: CONVERTS FASTA INTO A COLOUR-AWARE BIONANO CMAP FORMAT @@ -62,7 +62,7 @@ workflow INSILICO_DIGEST { MAKECMAP_FA2CMAPMULTICOLOR.out.cmap .map{ meta, cfile -> tuple( - [id : cfile.toString().split('_')[-3]], + [ id : cfile.toString().split('_')[-3] ], cfile ) } @@ -71,21 +71,21 @@ workflow INSILICO_DIGEST { MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey .map{ kfile -> tuple( - [id : kfile.toString().split('_')[-4]], + [ id : kfile.toString().split('_')[-4] ], kfile ) } - .set {ch_cmapkey_new} + .set { ch_cmapkey_new } ch_cmap_new .join(ch_cmapkey_new) - .multiMap {meta, cfile, kfile -> + .multiMap { meta, cfile, kfile -> cmap : tuple( meta, cfile) key_file : kfile } - .set {ch_join} + .set { ch_join } // // MODULE: RENAME CMAP IDs FROM BIONANO IDX TO ORIGINAL GENOMIC LOCATIONS @@ -98,11 +98,11 @@ workflow INSILICO_DIGEST { ch_versions = ch_versions.mix(MAKECMAP_RENAMECMAPIDS.out.versions) MAKECMAP_RENAMECMAPIDS.out.renamedcmap - .multiMap {meta, file -> + .multiMap { meta, file -> full : tuple ( meta, file ) sample : meta.id } - .set {ch_renamedcmap} + .set { ch_renamedcmap } // // MODULE: CONVERT CMAP FILE INTO BED FILE @@ -117,12 +117,12 @@ workflow INSILICO_DIGEST { MAKECMAP_CMAP2BED.out.bedfile .combine(sizefile) .combine(dot_as) - .multiMap {meta, bed, meta_2, dot_genome, as_file -> - bed_tuple : tuple(meta, bed) + .multiMap { meta, bed, meta_2, dot_genome, as_file -> + bed_tuple : tuple( meta, bed ) genome_file : dot_genome autosql : as_file } - .set {combined_ch} + .set { combined_ch } // // MODULE: CONVERT ABOVE BED INTO BIGBED WITH ADDITIONAL AS FILE diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf index 0c58cd52..fe341b00 100755 --- a/subworkflows/local/kmer.nf +++ b/subworkflows/local/kmer.nf @@ -26,47 +26,51 @@ workflow KMER { // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT // reads_path - .map {meta, reads_path -> + .map { meta, reads_path -> tuple( [ id : meta.id, single_end : true ], reads_path ) } - .set {get_reads_input} + .set { get_reads_input } // // MODULE: GETS PACBIO READ PATHS FROM READS_PATH // - ch_grabbed_read_paths = GrabFiles(get_reads_input) + ch_grabbed_read_paths = GrabFiles( get_reads_input ) // // MODULE: JOIN PACBIO READ // CAT_CAT( ch_grabbed_read_paths ) - ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + ch_versions = ch_versions.mix( CAT_CAT.out.versions.first() ) // // MODULE: COUNT KMERS // FASTK_FASTK( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first()) + ch_versions = ch_versions.mix( FASTK_FASTK.out.versions.first() ) // // LOGIC: PREPARE MERQURYFK INPUT // FASTK_FASTK.out.hist - .combine(FASTK_FASTK.out.ktab) - .combine(reference_tuple) + .combine( FASTK_FASTK.out.ktab ) + .combine( reference_tuple ) .map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary -> tuple( meta_hist, hist, ktab, primary, [] ) } - .set{ch_merq} + .set{ ch_merq } // // MODULE: USE KMER HISTOGRAM TO PRODUCE SPECTRA GRAPH // - MERQURYFK_MERQURYFK (ch_merq, [], []) + MERQURYFK_MERQURYFK ( + ch_merq, + [], + [] + ) ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions.first() ) emit: @@ -82,10 +86,10 @@ process GrabFiles { executor 'local' input: - tuple val(meta), path("in") + tuple val( meta ), path( "in" ) output: - tuple val(meta), path("in/*.fasta.gz") + tuple val( meta ), path( "in/*.fasta.gz" ) "true" } diff --git a/subworkflows/local/nuc_alignments.nf b/subworkflows/local/nuc_alignments.nf index 1aa0a4da..d83eb943 100755 --- a/subworkflows/local/nuc_alignments.nf +++ b/subworkflows/local/nuc_alignments.nf @@ -33,17 +33,16 @@ workflow NUC_ALIGNMENTS { // nuc_files .flatten() - .buffer(size: 2) - .combine(reference_tuple) - .combine(intron_size) - .map {meta, nuc_file, ref_meta, ref, intron -> - tuple( [ - id: meta.id, - type: meta.type, - org: meta.org, - intron_size: intron, - split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], - single_end: true + .buffer( size: 2 ) + .combine ( reference_tuple ) + .combine( intron_size ) + .map { meta, nuc_file, ref_meta, ref, intron -> + tuple( [id: meta.id, + type: meta.type, + org: meta.org, + intron_size: intron, + split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], + single_end: true ], nuc_file, ref, @@ -62,7 +61,7 @@ workflow NUC_ALIGNMENTS { bool_cigar_bam : bool_3 bool_bedfile : bool_4 } - .set {formatted_input} + .set { formatted_input } // // MODULE: ALIGNS REFERENCE FAIDX TO THE GENE_ALIGNMENT QUERY FILE FROM NUC_FILES @@ -84,13 +83,13 @@ workflow NUC_ALIGNMENTS { // AND DATA TYPE (RNA, CDS, DNA). // MINIMAP2_ALIGN.out.bam - .map {meta, file -> + .map { meta, file -> tuple( [ id: meta.org, type: meta.type ], - file)} - .groupTuple(by: [0]) // group by meta list - .set {merge_input} + file) } + .groupTuple( by: [0] ) // group by meta list + .set { merge_input } // // MODULE: MERGES THE BAM FILES FOUND IN THE GROUPED TUPLE IN REGARDS TO THE REFERENCE @@ -114,7 +113,7 @@ workflow NUC_ALIGNMENTS { // // MODULE: CONVERTS THE ABOVE MERGED BAM INTO BED FORMAT // - BEDTOOLS_BAMTOBED (SAMTOOLS_MERGE.out.bam) + BEDTOOLS_BAMTOBED ( SAMTOOLS_MERGE.out.bam ) ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) // TODO: try filtering out here too @@ -123,7 +122,7 @@ workflow NUC_ALIGNMENTS { // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // BEDTOOLS_BAMTOBED.out.bed - .map {meta, file -> + .map { meta, file -> tuple ( [ id: meta.id, type: meta.type, lines: file.countLines() @@ -131,7 +130,7 @@ workflow NUC_ALIGNMENTS { file ) } - .set {bedtools_input} + .set { bedtools_input } // // MODULE: SORTS THE ABOVE BED FILE @@ -153,17 +152,16 @@ workflow NUC_ALIGNMENTS { file_size: file.size() ], file ) } - .filter {it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink - .combine(dot_genome) - .multiMap {meta, ref, genome_meta, genome -> + .filter { it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink + .combine( dot_genome ) + .multiMap { meta, ref, genome_meta, genome -> bed_file: tuple( [ id: meta.id, type: meta.type, ], - ref - ) + ref ) dot_genome: genome } - .set {ucsc_input} + .set { ucsc_input } // // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGBED FILE @@ -173,7 +171,7 @@ workflow NUC_ALIGNMENTS { ucsc_input.dot_genome, [] ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) emit: nuc_alignment = UCSC_BEDTOBIGBED.out.bigbed.collect() diff --git a/subworkflows/local/pep_alignments.nf b/subworkflows/local/pep_alignments.nf index a3b3cae3..32d6da30 100755 --- a/subworkflows/local/pep_alignments.nf +++ b/subworkflows/local/pep_alignments.nf @@ -34,23 +34,19 @@ workflow PEP_ALIGNMENTS { // pep_files .flatten() - .buffer(size: 2) - .combine (MINIPROT_INDEX.out.index) - .multiMap {pep_meta, pep_file, miniprot_meta, miniprot_index -> - pep_tuple : tuple( [ - id: pep_meta.id, - type: pep_meta.type, - org: pep_meta.org - ], - pep_file - ) - index_file : tuple( [ - id: "Reference" - ], - miniprot_index - ) + .buffer( size: 2 ) + .combine ( MINIPROT_INDEX.out.index ) + .multiMap { pep_meta, pep_file, miniprot_meta, miniprot_index -> + pep_tuple : tuple( [ id: pep_meta.id, + type: pep_meta.type, + org: pep_meta.org + ], + pep_file ) + index_file : tuple( [ id: "Reference", + ], + miniprot_index ) } - .set {formatted_input} + .set { formatted_input } // // MODULE: ALIGNS PEP DATA WITH REFERENCE INDEX @@ -60,21 +56,21 @@ workflow PEP_ALIGNMENTS { formatted_input.pep_tuple, formatted_input.index_file ) - ch_versions = ch_versions.mix(MINIPROT_ALIGN.out.versions) + ch_versions = ch_versions.mix( MINIPROT_ALIGN.out.versions ) // // LOGIC: GROUPS OUTPUT GFFS BASED ON QUERY ORGANISMS AND DATA TYPE (PEP) // MINIPROT_ALIGN.out.gff - .map {meta, file -> + .map { meta, file -> tuple( [ id : meta.org + '_pep', type : meta.type ], file ) } - .groupTuple(by: [0]) - .set {grouped_tuple} + .groupTuple( by: [0] ) + .set { grouped_tuple } // // MODULE: AS ABOVE OUTPUT IS BED FORMAT, IT IS MERGED PER ORGANISM + TYPE @@ -82,20 +78,20 @@ workflow PEP_ALIGNMENTS { CAT_CAT ( grouped_tuple ) - ch_versions = ch_versions.mix(CAT_CAT.out.versions) + ch_versions = ch_versions.mix( CAT_CAT.out.versions ) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // CAT_CAT.out.file_out - .map {meta, file -> + .map { meta, file -> tuple ( [ id: meta.id, lines: file.countLines() ], file ) } - .set {bedtools_input} + .set { bedtools_input } // // MODULE: SORTS ABOVE OUTPUT AND RETAINS GFF SUFFIX @@ -105,7 +101,7 @@ workflow PEP_ALIGNMENTS { bedtools_input , [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: CUTS GFF INTO PUNCHLIST @@ -113,7 +109,7 @@ workflow PEP_ALIGNMENTS { EXTRACT_COV_IDEN ( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix(EXTRACT_COV_IDEN.out.versions) + ch_versions = ch_versions.mix( EXTRACT_COV_IDEN.out.versions ) // // MODULE: COMPRESS AND INDEX MERGED.GFF @@ -122,7 +118,7 @@ workflow PEP_ALIGNMENTS { TABIX_BGZIPTABIX ( BEDTOOLS_SORT.out.sorted ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: gff_file = BEDTOOLS_SORT.out.sorted diff --git a/subworkflows/local/repeat_density.nf b/subworkflows/local/repeat_density.nf index 2e5f70eb..8764408f 100755 --- a/subworkflows/local/repeat_density.nf +++ b/subworkflows/local/repeat_density.nf @@ -31,7 +31,7 @@ workflow REPEAT_DENSITY { WINDOWMASKER_MKCOUNTS ( reference_tuple ) - ch_versions = ch_versions.mix(WINDOWMASKER_MKCOUNTS.out.versions) + ch_versions = ch_versions.mix( WINDOWMASKER_MKCOUNTS.out.versions ) // // MODULE: CALCULATE THE STATISTICS OF THE MARKED UP REGIONS @@ -40,7 +40,7 @@ workflow REPEAT_DENSITY { WINDOWMASKER_MKCOUNTS.out.counts, reference_tuple ) - ch_versions = ch_versions.mix(WINDOWMASKER_USTAT.out.versions) + ch_versions = ch_versions.mix( WINDOWMASKER_USTAT.out.versions ) // // MODULE: USE USTAT OUTPUT TO EXTRACT REPEATS FROM FASTA @@ -48,7 +48,7 @@ workflow REPEAT_DENSITY { EXTRACT_REPEAT( WINDOWMASKER_USTAT.out.intervals ) - ch_versions = ch_versions.mix(EXTRACT_REPEAT.out.versions) + ch_versions = ch_versions.mix( EXTRACT_REPEAT.out.versions ) // // MODULE: CREATE WINDOWS FROM .GENOME FILE @@ -56,7 +56,7 @@ workflow REPEAT_DENSITY { BEDTOOLS_MAKEWINDOWS( dot_genome ) - ch_versions = ch_versions.mix(BEDTOOLS_MAKEWINDOWS.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_MAKEWINDOWS.out.versions ) // // LOGIC: COMBINE TWO CHANNELS AND OUTPUT tuple(meta, windows_file, repeat_file) @@ -70,7 +70,7 @@ workflow REPEAT_DENSITY { repeat_file ) } - .set {intervals} + .set { intervals } // // MODULE: GENERATES THE REPEAT FILE FROM THE WINDOW FILE AND GENOME FILE @@ -79,7 +79,7 @@ workflow REPEAT_DENSITY { intervals, dot_genome ) - ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_INTERSECT.out.versions ) // // MODULE: FIXES IDS FOR REPEATS @@ -87,7 +87,7 @@ workflow REPEAT_DENSITY { RENAME_IDS( BEDTOOLS_INTERSECT.out.intersect ) - ch_versions = ch_versions.mix(RENAME_IDS.out.versions) + ch_versions = ch_versions.mix( RENAME_IDS.out.versions ) // // MODULE: SORTS THE ABOVE BED FILES @@ -95,17 +95,17 @@ workflow REPEAT_DENSITY { GNU_SORT_A ( RENAME_IDS.out.bed // Intersect file ) - ch_versions = ch_versions.mix(GNU_SORT_A.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_A.out.versions ) GNU_SORT_B ( dot_genome // Genome file - Will not run unless genome file is sorted to ) - ch_versions = ch_versions.mix(GNU_SORT_B.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_B.out.versions ) GNU_SORT_C ( BEDTOOLS_MAKEWINDOWS.out.bed // Windows file ) - ch_versions = ch_versions.mix(GNU_SORT_C.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // // MODULE: ADDS 4TH COLUMN TO BED FILE USED IN THE REPEAT DENSITY GRAPH @@ -113,7 +113,7 @@ workflow REPEAT_DENSITY { REFORMAT_INTERSECT ( GNU_SORT_A.out.sorted ) - ch_versions = ch_versions.mix(REFORMAT_INTERSECT.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // // MODULE: TABIX AND GZIP THE REPEAT DENSITY BED FILE FOR JBROWSE @@ -121,7 +121,7 @@ workflow REPEAT_DENSITY { TABIX_BGZIPTABIX ( REFORMAT_INTERSECT.out.bed ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) // // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO @@ -136,7 +136,7 @@ workflow REPEAT_DENSITY { bed ) } - .set {for_mapping} + .set { for_mapping } // // MODULE: MAPS THE REPEATS AGAINST THE REFERENCE GENOME @@ -145,7 +145,7 @@ workflow REPEAT_DENSITY { for_mapping, GNU_SORT_B.out.sorted ) - ch_versions = ch_versions.mix(BEDTOOLS_MAP.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_MAP.out.versions ) // // MODULE: REPLACES . WITH 0 IN MAPPED FILE @@ -153,16 +153,16 @@ workflow REPEAT_DENSITY { REPLACE_DOTS ( BEDTOOLS_MAP.out.mapped ) - ch_versions = ch_versions.mix(REPLACE_DOTS.out.versions) + ch_versions = ch_versions.mix( REPLACE_DOTS.out.versions ) // // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGWIG FILE // UCSC_BEDGRAPHTOBIGWIG( REPLACE_DOTS.out.bed, - GNU_SORT_B.out.sorted.map{it[1]} // Pulls file from tuple of meta and file + GNU_SORT_B.out.sorted.map { it[1] } // Pulls file from tuple of meta and file ) - ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) emit: repeat_density = UCSC_BEDGRAPHTOBIGWIG.out.bigwig diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index 8e702635..f7606440 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -36,7 +36,7 @@ workflow SELFCOMP { SELFCOMP_SPLITFASTA( reference_tuple ) - ch_versions = ch_versions.mix(SELFCOMP_SPLITFASTA.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_SPLITFASTA.out.versions ) // // LOGIC: CALCULATE THE NUMBER OF GB WHICH WILL DICTATE THE NUMBER OF @@ -44,13 +44,12 @@ workflow SELFCOMP { // ALSO CALCULATES THE NUMBER OF TOTAL WINDOWS NEEDED IN THE REFERENCE // reference_tuple - .map{it, file -> file.size()} - .set{file_size} // Using set as TAP will force the pipeline to not complete successfully in some cases + .map{ it, file -> file.size()} + .set { file_size } // Using set as TAP will force the pipeline to not complete successfully in some cases file_size .sum{it / 1e9} - .collect {new java.math.BigDecimal (it).setScale(0, RoundingMode.UP)} - .flatten() + .map { it -> new java.math.BigDecimal (it).setScale(0, java.math.RoundingMode.UP) } .set { chunk_number } // @@ -81,7 +80,7 @@ workflow SELFCOMP { .set {len_ch} // tap out to preserve length of SEQKIT_SPLIT list len_ch // tap swapped with set as tap stops pipeline completion - .map {meta, files -> + .map { meta, files -> files } .flatten() // flatten list into singles @@ -99,7 +98,7 @@ workflow SELFCOMP { } .transpose() // Transpose the channel so that we have a channel for file in query // allows this to work on list of 1 and beyond - .map{meta, ref, qry -> + .map { meta, ref, qry -> tuple( [ id: meta.id, sz: meta.sz, it: qry.toString().split('/')[-1] // get file name of the new query @@ -108,7 +107,7 @@ workflow SELFCOMP { qry ) } - .set{mummer_input} + .set{ mummer_input } // // MODULE: ALIGNS 1GB CHUNKS TO 500KB CHUNKS @@ -117,25 +116,25 @@ workflow SELFCOMP { MUMMER( mummer_input ) - ch_versions = ch_versions.mix(MUMMER.out.versions) + ch_versions = ch_versions.mix( MUMMER.out.versions ) // // LOGIC: COLLECT COORD FILES AND CONVERT TO LIST OF FILES // ADD REFERENCE META // MUMMER.out.coords - .map{meta, file -> + .map{ meta, file -> file } .collect() .toList() - .combine(reference_tuple) - .map{files, meta, ref -> + .combine( reference_tuple ) + .map { files, meta, ref -> tuple( meta, files ) } - .set {ch_mummer_files} + .set { ch_mummer_files } // // MODULE: MERGES MUMMER ALIGNMENT FILES @@ -143,7 +142,7 @@ workflow SELFCOMP { CAT_CAT( ch_mummer_files ) - ch_versions = ch_versions.mix(CAT_CAT.out.versions) + ch_versions = ch_versions.mix( CAT_CAT.out.versions ) // // MODULE: CONVERT THE MUMMER ALIGNMENTS INTO BED FORMAT @@ -152,7 +151,7 @@ workflow SELFCOMP { CAT_CAT.out.file_out, motif_len ) - ch_versions = ch_versions.mix(SELFCOMP_MUMMER2BED.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_MUMMER2BED.out.versions ) // // MODULE: GENERATE A LIST OF IDs AND GENOMIC POSITIONS OF SELFCOMPLEMENTARY REGIONS @@ -162,20 +161,20 @@ workflow SELFCOMP { SELFCOMP_MUMMER2BED.out.bedfile, SELFCOMP_SPLITFASTA.out.agp ) - ch_versions = ch_versions.mix(SELFCOMP_MAPIDS.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_MAPIDS.out.versions ) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // SELFCOMP_MAPIDS.out.bedfile - .map{meta, file -> + .map { meta, file -> tuple ( [ id: meta.id, lines: file.countLines() ], file ) } - .set{bedtools_input} + .set { bedtools_input } // // MODULE: SORTS ABOVE OUTPUT BED FILE AND RETAINS BED SUFFIX @@ -184,7 +183,7 @@ workflow SELFCOMP { bedtools_input, [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: BUILD ALIGNMENT BLOCKS @@ -192,7 +191,7 @@ workflow SELFCOMP { SELFCOMP_ALIGNMENTBLOCKS( BEDTOOLS_SORT.out.sorted ) - ch_versions = ch_versions.mix(SELFCOMP_ALIGNMENTBLOCKS.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_ALIGNMENTBLOCKS.out.versions ) // // MODULE: SORT BLOCKS FILES AND FILTER BY MOTIF LENGTH @@ -200,7 +199,7 @@ workflow SELFCOMP { CONCATBLOCKS( SELFCOMP_ALIGNMENTBLOCKS.out.blockfile ) - ch_versions = ch_versions.mix(CONCATBLOCKS.out.versions) + ch_versions = ch_versions.mix( CONCATBLOCKS.out.versions ) // // MODULE: CONVERTS ABOVE OUTPUT INTO BIGBED FORMAT @@ -210,7 +209,7 @@ workflow SELFCOMP { dot_genome.map{it[1]}, // Pulls file from tuple ( meta and file ) selfcomp_as ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions ) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) emit: ch_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf index 1c7642bc..7a2b1c40 100755 --- a/subworkflows/local/synteny.nf +++ b/subworkflows/local/synteny.nf @@ -28,8 +28,8 @@ workflow SYNTENY { .combine(reference_tuple) .multiMap{syntenic_ref, meta, ref -> syntenic_tuple : tuple([ id: syntenic_ref.toString().split('/')[-1].split('.fasta')[0], - class: meta.class, - project_type: meta.project_type + class: meta.class, + project_type: meta.project_type ], syntenic_ref) reference_fa : tuple( meta, ref) @@ -39,7 +39,7 @@ workflow SYNTENY { bool_cigar_bam : false bool_bedfile : false } - .set {mm_input} + .set { mm_input } // // MODULE: ALIGNS THE SUNTENIC GENOMES TO THE REFERENCE GENOME @@ -54,7 +54,7 @@ workflow SYNTENY { mm_input.bool_cigar_bam, mm_input.bool_bedfile, ) - ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions ) emit: ch_paf = MINIMAP2_ALIGN.out.paf diff --git a/subworkflows/local/telo_finder.nf b/subworkflows/local/telo_finder.nf index aa0b7b80..fe5b704b 100755 --- a/subworkflows/local/telo_finder.nf +++ b/subworkflows/local/telo_finder.nf @@ -24,7 +24,7 @@ workflow TELO_FINDER { reference_tuple, teloseq ) - ch_versions = ch_versions.mix(FIND_TELOMERE_REGIONS.out.versions) + ch_versions = ch_versions.mix( FIND_TELOMERE_REGIONS.out.versions ) // // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE @@ -32,7 +32,7 @@ workflow TELO_FINDER { FIND_TELOMERE_WINDOWS ( FIND_TELOMERE_REGIONS.out.telomere ) - ch_versions = ch_versions.mix(FIND_TELOMERE_WINDOWS.out.versions) + ch_versions = ch_versions.mix( FIND_TELOMERE_WINDOWS.out.versions ) // // MODULE: EXTRACTS THE LOCATION OF TELOMERIC SEQUENCE BASED ON THE WINDOWS @@ -40,7 +40,7 @@ workflow TELO_FINDER { EXTRACT_TELO ( FIND_TELOMERE_WINDOWS.out.windows ) - ch_versions = ch_versions.mix(EXTRACT_TELO.out.versions) + ch_versions = ch_versions.mix( EXTRACT_TELO.out.versions ) // // MODULE: BGZIP AND TABIX THE OUTPUT FILE @@ -48,7 +48,7 @@ workflow TELO_FINDER { TABIX_BGZIPTABIX ( EXTRACT_TELO.out.bed ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: bed_file = EXTRACT_TELO.out.bed diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index 566a8ddd..69a5cb1f 100755 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -24,18 +24,18 @@ workflow YAML_INPUT { .flatten() .combine( workflow_id ) .multiMap { data, id -> - assembly: (data.assembly) - assembly_reads: (data.assem_reads) - hic_data: (data.hic_data) - kmer_profile: (data.kmer_profile) - reference: (file(data.reference_file, checkIfExists: true)) - alignment: (id == "FULL" || id == "JBROWSE" ? data.alignment : "") - self_comp: (id == "FULL" || id == "JBROWSE" ? data.self_comp : "") - synteny: (id == "FULL" || id == "JBROWSE" ? data.synteny : "") - intron: (id == "FULL" || id == "JBROWSE" ? data.intron : "") - busco_gene: (data.busco) - teloseq: (data.telomere) - map_order: (data.map_order) + assembly: ( data.assembly ) + assembly_reads: ( data.assem_reads ) + hic_data: ( data.hic_data ) + kmer_profile: ( data.kmer_profile ) + reference: ( file(data.reference_file, checkIfExists: true) ) + alignment: ( id == "FULL" ? data.alignment : "" ) + self_comp: ( id == "FULL" ? data.self_comp : "" ) + synteny: ( id == "FULL" ? data.synteny : "" ) + intron: ( id == "FULL" ? data.intron : "" ) + busco_gene: ( data.busco ) + teloseq: ( data.telomere ) + map_order: ( data.map_order) } .set{ group } @@ -46,7 +46,7 @@ workflow YAML_INPUT { // group .assembly - .multiMap{ data -> + .multiMap { data -> assem_level: data.assem_level assem_version: data.assem_version sample_id: data.sample_id @@ -54,32 +54,32 @@ workflow YAML_INPUT { defined_class: data.defined_class project_id: data.project_id } - .set{assembly_data} + .set { assembly_data } group .assembly_reads - .multiMap{ data -> + .multiMap { data -> read_type: data.read_type read_data: data.read_data supplement: data.supplementary_data } - .set{assem_reads} + .set { assem_reads } group .hic_data - .multiMap{ data -> + .multiMap { data -> hic_cram: data.hic_cram hic_aligner: data.hic_aligner } - .set {hic} + .set { hic } group .kmer_profile - .multiMap{ data -> + .multiMap { data -> length: data.kmer_length dir: data.dir } - .set {kmer_profiling} + .set { kmer_profiling } group .alignment @@ -87,55 +87,54 @@ workflow YAML_INPUT { .multiMap{ data, id -> genesets: (id == "FULL" || id == "JBROWSE" ? data.genesets : "") } - .set{alignment_data} + .set{ alignment_data } group .self_comp - .combine(workflow_id) - .multiMap{ data, id -> - motif_len: (id == "FULL" || id == "JBROWSE" ? data.motif_len : "") - mummer_chunk: (id == "FULL" || id == "JBROWSE" ? data.mummer_chunk : "") + .combine( workflow_id ) + .multiMap { data, id -> + motif_len: (id == "FULL" ? data.motif_len : "") + mummer_chunk: (id == "FULL" ? data.mummer_chunk : "") } - .set{selfcomp_data} + .set{ selfcomp_data } group .intron - .combine(workflow_id) - .multiMap{ data, id -> - size: (id == "FULL" || id == "JBROWSE" ? data.size : "") + .combine( workflow_id ) + .multiMap { data, id -> + size: (id == "FULL" ? data.size : "") } - .set {intron_size} + .set { intron_size } group .teloseq - .multiMap{ data -> + .multiMap { data -> teloseq: data.teloseq } - .set {teloseq} + .set { teloseq } group .busco_gene - .multiMap{ data -> + .multiMap { data -> lineage: data.lineage lineages_path: data.lineages_path } - .set {busco_lineage} + .set { busco_lineage } // // LOGIC: COMBINE SOME CHANNELS INTO VALUES REQUIRED DOWNSTREAM // assembly_data.sample_id - .combine(assembly_data.assem_version) - .map{it1, it2 -> - ("${it1}_${it2}") - } - .set{tolid_version} + .combine( assembly_data.assem_version ) + .map { it1, it2 -> + ("${it1}_${it2}")} + .set { tolid_version } tolid_version - .combine(group.reference) - .combine(assembly_data.defined_class) - .combine(assembly_data.project_id) - .map{sample, ref_file, defined_class, project -> + .combine( group.reference ) + .combine( assembly_data.defined_class ) + .combine( assembly_data.project_id ) + .map { sample, ref_file, defined_class, project -> tuple( [ id: sample, class: defined_class, project_type: project @@ -143,13 +142,13 @@ workflow YAML_INPUT { ref_file ) } - .set{ref_ch} + .set { ref_ch } - if (assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" }) { + if ( assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" } ) { tolid_version - .combine(assem_reads.read_type) - .combine(assem_reads.read_data) - .map{sample, type, data -> + .combine( assem_reads.read_type ) + .combine( assem_reads.read_data ) + .map{ sample, type, data -> tuple( [ id : sample, single_end : true, read_type : type @@ -157,13 +156,13 @@ workflow YAML_INPUT { data ) } - .set {read_ch} + .set { read_ch } } - else if (assem_reads.read_type.filter {it == "illumina"}) { + else if ( assem_reads.read_type.filter { it == "illumina" } ) { tolid_version - .combine(assem_reads.read_type) - .combine(assem_reads.read_data) - .map{sample, type, data -> + .combine( assem_reads.read_type ) + .combine( assem_reads.read_data ) + .map{ sample, type, data -> tuple( [ id : sample, single_end : false, read_type : type @@ -171,40 +170,40 @@ workflow YAML_INPUT { data ) } - .set {read_ch} + .set { read_ch } } tolid_version - .combine(hic.hic_cram) - .combine(hic.hic_aligner) - .map{sample, data, aligner -> + .combine( hic.hic_cram ) + .combine( hic.hic_aligner ) + .map { sample, data, aligner -> tuple( [ id: sample, aligner: aligner ], data ) } - .set {hic_ch} + .set { hic_ch } tolid_version - .combine(assem_reads.supplement) - .map{sample, data -> + .combine( assem_reads.supplement ) + .map { sample, data -> tuple( [ id: sample ], data ) } - .set {supplement_ch} + .set { supplement_ch } tolid_version - .combine (assembly_data.sample_id) - .combine (kmer_profiling.length) - .combine (kmer_profiling.dir) - .map{sample, sample_id, kmer_len, dir -> + .combine ( assembly_data.sample_id ) + .combine ( kmer_profiling.length ) + .combine ( kmer_profiling.dir ) + .map { sample, sample_id, kmer_len, dir -> tuple( [ id: sample, kmer: kmer_len ], file("${dir}/k${kmer_len}/${sample_id}.k${kmer_len}.ktab") // Don't check for existence yet ) } - .set {kmer_prof} + .set { kmer_prof } emit: assembly_id = tolid_version @@ -239,5 +238,5 @@ workflow YAML_INPUT { } def readYAML( yamlfile ) { - return new Yaml().load( new FileReader( yamlfile.toString())) + return new Yaml().load( new FileReader( yamlfile.toString() ) ) } diff --git a/workflows/treeval.nf b/workflows/treeval.nf index a9906d0f..d616f491 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -131,6 +131,7 @@ workflow TREEVAL { ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) } + // // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS // ON THOSE CHUNKS @@ -216,7 +217,7 @@ workflow TREEVAL { YAML_INPUT.out.read_ch ) coverage_report = READ_COVERAGE.out.ch_reporting - ch_versions = ch_versions.mix(READ_COVERAGE.out.versions) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) } else { coverage_report = [] } @@ -257,6 +258,7 @@ workflow TREEVAL { ch_versions = ch_versions.mix( KMER.out.versions ) } + // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // @@ -274,8 +276,8 @@ workflow TREEVAL { REPEAT_DENSITY.out.repeat_density, params.entry ) + hic_report = HIC_MAPPING.out.ch_reporting ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) - hic_report = HIC_MAPPING.out.ch_reporting } else { hic_report = [] } diff --git a/workflows/treeval_jbrowse.nf b/workflows/treeval_jbrowse.nf old mode 100755 new mode 100644 index 62d20c7b..2271a206 --- a/workflows/treeval_jbrowse.nf +++ b/workflows/treeval_jbrowse.nf @@ -56,6 +56,14 @@ workflow TREEVAL_JBROWSE { // ch_versions = Channel.empty() + exclude_workflow_steps = params.steps ? params.steps.split(",") : "NONE" + + full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] + + if (!full_list.containsAll(exclude_workflow_steps)) { + exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" + } + params.entry = 'JBROWSE' input_ch = Channel.fromPath(params.input, checkIfExists: true) @@ -104,15 +112,17 @@ workflow TREEVAL_JBROWSE { // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate // file with enzymatic digest sites. // - ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + if ( !exclude_workflow_steps.contains("insilico_digest")) { + ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) - INSILICO_DIGEST ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - ch_enzyme, - digest_asfile - ) - ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) + INSILICO_DIGEST ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + ch_enzyme, + digest_asfile + ) + ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) + } // // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS @@ -128,62 +138,71 @@ workflow TREEVAL_JBROWSE { // // SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data // - GENE_ALIGNMENT ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - YAML_INPUT.out.align_data_dir, - YAML_INPUT.out.align_geneset, - YAML_INPUT.out.align_common, - YAML_INPUT.out.intron_size, - gene_alignment_asfiles - ) - ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) + if ( !exclude_workflow_steps.contains("gene_alignment")) { + GENE_ALIGNMENT ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + YAML_INPUT.out.align_genesets, + YAML_INPUT.out.intron_size, + gene_alignment_asfiles + ) + ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) + } // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as // file to generate a file containing sites of self-complementary sequnce. // - SELFCOMP ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.mummer_chunk, - YAML_INPUT.out.motif_len, - selfcomp_asfile - ) - ch_versions = ch_versions.mix( SELFCOMP.out.versions ) + if ( !exclude_workflow_steps.contains("selfcomp")) { + SELFCOMP ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.mummer_chunk, + YAML_INPUT.out.motif_len, + selfcomp_asfile + ) + ch_versions = ch_versions.mix( SELFCOMP.out.versions ) + } // // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence // and generated a file of syntenic blocks. // - SYNTENY ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.synteny_path - ) - ch_versions = ch_versions.mix( SYNTENY.out.versions ) + if ( !exclude_workflow_steps.contains("synteny")) { + YAML_INPUT.out.synteny_paths.view {"SYNTENY_MAIN: $it"} + SYNTENY ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.synteny_paths + ) + ch_versions = ch_versions.mix( SYNTENY.out.versions ) + } // // SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS // - BUSCO_ANNOTATION ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.lineageinfo, - YAML_INPUT.out.lineagespath, - buscogene_asfile, - ancestral_table - ) - ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) + if ( !exclude_workflow_steps.contains("busco")) { + BUSCO_ANNOTATION ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.lineageinfo, + YAML_INPUT.out.lineagespath, + buscogene_asfile, + ancestral_table + ) + ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) + } // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - KMER ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( KMER.out.versions ) + if ( !exclude_workflow_steps.contains("kmer")) { + KMER ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( KMER.out.versions ) + } // // SUBWORKFLOW: Collates version data from prior subworflows diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index bb736f26..43c640e7 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -53,6 +53,7 @@ workflow TREEVAL_RAPID { main: ch_versions = Channel.empty() + exclude_workflow_steps = params.steps ? params.steps.split(",") : "NONE" full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] @@ -61,6 +62,7 @@ workflow TREEVAL_RAPID { exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" } + params.entry = 'RAPID' input_ch = Channel.fromPath(params.input, checkIfExists: true) // @@ -83,7 +85,6 @@ workflow TREEVAL_RAPID { // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - if ( !exclude_workflow_steps.contains("repeat_density")) { REPEAT_DENSITY ( YAML_INPUT.out.reference_ch, @@ -126,6 +127,8 @@ workflow TREEVAL_RAPID { } else { coverage_report = [] } + + // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // diff --git a/workflows/treeval_rapid_tol.nf b/workflows/treeval_rapid_tol.nf index 8e10fcc6..97dd067d 100755 --- a/workflows/treeval_rapid_tol.nf +++ b/workflows/treeval_rapid_tol.nf @@ -154,7 +154,7 @@ workflow TREEVAL_RAPID_TOL { REPEAT_DENSITY.out.repeat_density, params.entry ) - hic_report = HIC_MAPPING.out.ch_reporting + hic_report = HIC_MAPPING.out.ch_reporting ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) } else { hic_report = []