From 9df46fa8d54e33161f62126f2fdc42e093088e92 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Tue, 19 Nov 2024 14:52:33 +0000 Subject: [PATCH 01/16] Revert "Dp24 steps" --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 9 - CITATIONS.md | 6 +- main.nf | 14 -- modules/local/avgcov.nf | 4 +- modules/local/bamtobed_sort.nf | 5 - modules/local/chunkfasta.nf | 4 +- modules/local/concatblocks.nf | 4 +- .../cram_filter_align_bwamem2_fixmate_sort.nf | 5 - ...filter_minimap2_filter5end_fixmate_sort.nf | 5 - modules/local/extract_buscogene.nf | 4 +- modules/local/extract_cov_iden.nf | 10 +- modules/local/extract_repeat.nf | 6 +- modules/local/extract_telo.nf | 12 +- modules/local/find_telomere_regions.nf | 11 +- modules/local/find_telomere_windows.nf | 9 +- modules/local/fkutils/fkprof/main.nf | 13 +- modules/local/gap_length.nf | 10 +- modules/local/generate_cram_csv.nf | 9 +- modules/local/get_largest_scaff.nf | 6 +- modules/local/get_paired_contact_bed.nf | 10 +- modules/local/getminmaxpunches.nf | 10 +- modules/local/graphoverallcoverage.nf | 9 +- modules/local/juicer_tools_pre.nf | 3 +- modules/local/paf_to_bed.nf | 10 +- modules/local/pretext_graph.nf | 9 +- modules/local/reformat_intersect.nf | 9 +- modules/local/rename_ids.nf | 10 +- modules/local/replace_dots.nf | 10 +- nextflow.config | 1 - nextflow_schema.json | 5 - subworkflows/local/ancestral_gene.nf | 6 +- subworkflows/local/busco_annotation.nf | 26 +-- subworkflows/local/gap_finder.nf | 6 +- subworkflows/local/gene_alignment.nf | 16 +- subworkflows/local/generate_genome.nf | 21 +- subworkflows/local/generate_sorted_genome.nf | 4 +- .../local/generate_unsorted_genome.nf | 2 +- subworkflows/local/hic_bamtobed.nf | 22 +- subworkflows/local/hic_bwamem2.nf | 22 +- subworkflows/local/hic_mapping.nf | 84 +++---- subworkflows/local/hic_minimap2.nf | 40 ++-- subworkflows/local/insilico_digest.nf | 28 +-- subworkflows/local/kmer.nf | 22 +- subworkflows/local/nuc_alignments.nf | 54 +++-- subworkflows/local/pep_alignments.nf | 48 ++-- subworkflows/local/repeat_density.nf | 34 +-- subworkflows/local/selfcomp.nf | 50 ++--- subworkflows/local/synteny.nf | 14 +- subworkflows/local/telo_finder.nf | 8 +- subworkflows/local/yaml_input.nf | 151 +++++++------ workflows/treeval.nf | 198 +++++++--------- workflows/treeval_jbrowse.nf | 212 ------------------ workflows/treeval_rapid.nf | 94 +++----- workflows/treeval_rapid_tol.nf | 104 ++++----- 55 files changed, 567 insertions(+), 933 deletions(-) delete mode 100755 workflows/treeval_jbrowse.nf diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c379c507..2e226b4f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/treeval') }}" - runs-on: [ubuntu-latest] # Let's see if Pretext errors are a architecture thing + runs-on: ubuntu2204-8c strategy: matrix: NXF_VER: diff --git a/CHANGELOG.md b/CHANGELOG.md index 7be3b8c6..0170b9f2 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,20 +40,11 @@ This builds on the initial release by adding subworkflows which generate kmer ba - Fix a bug in build_alignment_blocks.py to avoid indexing errors happening in large genomes. - Change output BEDGRAPH from EXTRACT_TELO module. -#### Hot Fix 1 - -- Adding support for multi-library cram input. - -#### Hot Fix 2 - -- Adding support to select subworkflows to use in pipeline run. - ### Parameters | Old Parameter | New Parameter | | ------------- | ------------- | | - | --juicer | -| - | --steps | ### Software dependencies diff --git a/CITATIONS.md b/CITATIONS.md index 7db8de50..d179e501 100755 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -34,10 +34,6 @@ > Durand, N.C. et al. 2016. ‘Juicer provides a one-click system for analyzing loop-resolution hi-C experiments’, Cell Systems, 3(1), pp. 95–98. doi:10.1016/j.cels.2016.07.002. -- [Merqury_FK](https://github.com/thegenemyers/MERQURY.FK) - - > Myers, G., Rhie, A. (2024). MerquryFK & KatFK. [online]. https://github.com/thegenemyers/MERQURY.FK. (Accessed on 20 September 2024). - - [Minimap2](https://pubmed.ncbi.nlm.nih.gov/34623391/) > Li, H. 2021. ‘New strategies to improve MINIMAP2 alignment accuracy’, Bioinformatics, 37(23), pp. 4572–4574. doi:10.1093/bioinformatics/btab705. @@ -76,7 +72,7 @@ - [Samtools](https://pubmed.ncbi.nlm.nih.gov/33590861/) - > Danecek P, Bonfield JK, Liddle J, Marshall J, Ohan V, Pollard MO, Whitwham A, Keane T, McCarthy SA, Davies RM, Li H. Twelve years of SAMtools and BCFtools. Gigascience. 2021 Feb 16;10(2):giab008. doi: 10.1093/gigascience/giab008. PMID: 33590861; PMCID: PMC7931819. + > Di Tommaso, Paolo, et al. 2017. “Nextflow Enables Reproducible Computational Workflows.” Nature Biotechnology, 35(4), pp. 316–19, https://doi.org/10.1038/nbt.3820. - [SeqTK](https://github.com/lh3/seqtk) diff --git a/main.nf b/main.nf index 6931d2bd..2a9fc377 100755 --- a/main.nf +++ b/main.nf @@ -25,7 +25,6 @@ WorkflowMain.initialise( workflow, params, log ) include { TREEVAL } from './workflows/treeval' include { TREEVAL_RAPID } from './workflows/treeval_rapid' include { TREEVAL_RAPID_TOL } from './workflows/treeval_rapid_tol' -include { TREEVAL_JBROWSE } from './workflows/treeval_jbrowse' // // WORKFLOW: RUN MAIN PIPELINE GENERATING ALL OUTPUT @@ -48,15 +47,6 @@ workflow SANGERTOL_TREEVAL_RAPID_TOL { TREEVAL_RAPID_TOL () } -// -// WORKFLOW: RUN ONLY THE SUBWORKFLOWS REQUIRED FOR JBROWSE UPLOAD -// - THIS IS TO COMPLEMENT A NEW PROCESS WHERE MAJORITY OF TICKETS WILL BE RC -// AND GET REQUESTED FOR FULL -// -workflow SANGERTOL_TREEVAL_JBROWSE { - TREEVAL_JBROWSE () -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -78,10 +68,6 @@ workflow RAPID_TOL { SANGERTOL_TREEVAL_RAPID_TOL () } -workflow JBROWSE { - SANGERTOL_TREEVAL_JBROWSE () -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules/local/avgcov.nf b/modules/local/avgcov.nf index 0077f2bd..7e1e477e 100755 --- a/modules/local/avgcov.nf +++ b/modules/local/avgcov.nf @@ -4,8 +4,8 @@ process AVGCOV { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) diff --git a/modules/local/bamtobed_sort.nf b/modules/local/bamtobed_sort.nf index bd1f0cfc..c9d73306 100755 --- a/modules/local/bamtobed_sort.nf +++ b/modules/local/bamtobed_sort.nf @@ -6,11 +6,6 @@ process BAMTOBED_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' : 'biocontainers/mulled-v2-9d3a458f6420e5712103ae2af82c94d26d63f059:60b54b43045e8cf39ba307fd683c69d4c57240ce-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "BAMTOBED_SORT module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(bam) diff --git a/modules/local/chunkfasta.nf b/modules/local/chunkfasta.nf index 0400df24..afb5050e 100755 --- a/modules/local/chunkfasta.nf +++ b/modules/local/chunkfasta.nf @@ -4,8 +4,8 @@ process CHUNKFASTA { conda "conda-forge::pyfasta=0.5.2-1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyfasta:0.5.2--py_1' : - 'biocontainers/pyfasta:0.5.2--py_1' }" + 'https://depot.galaxyproject.org/singularity/pyfasta:0.5.2--py_1' : + 'biocontainers/pyfasta:0.5.2--py_1' }" input: tuple val(meta), path('input.fasta') diff --git a/modules/local/concatblocks.nf b/modules/local/concatblocks.nf index f58641de..5c01459d 100755 --- a/modules/local/concatblocks.nf +++ b/modules/local/concatblocks.nf @@ -4,8 +4,8 @@ process CONCATBLOCKS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(mergeblocks) diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf index 06624c52..ca706e28 100755 --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -6,11 +6,6 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix), path(reference) diff --git a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf index a9d2b977..8d8d69e4 100755 --- a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf +++ b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf @@ -6,11 +6,6 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference) diff --git a/modules/local/extract_buscogene.nf b/modules/local/extract_buscogene.nf index a5cd3a5e..44149d74 100755 --- a/modules/local/extract_buscogene.nf +++ b/modules/local/extract_buscogene.nf @@ -4,8 +4,8 @@ process EXTRACT_BUSCOGENE { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: diff --git a/modules/local/extract_cov_iden.nf b/modules/local/extract_cov_iden.nf index bddcbab7..d50fd39c 100755 --- a/modules/local/extract_cov_iden.nf +++ b/modules/local/extract_cov_iden.nf @@ -4,15 +4,15 @@ process EXTRACT_COV_IDEN { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*.bed") , emit: punchlist - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.bed" ) , emit: punchlist + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" diff --git a/modules/local/extract_repeat.nf b/modules/local/extract_repeat.nf index 39f7ee23..85fe9c93 100755 --- a/modules/local/extract_repeat.nf +++ b/modules/local/extract_repeat.nf @@ -8,11 +8,11 @@ process EXTRACT_REPEAT { 'biocontainers/perl:5.26.2' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), path("*.bed") , emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), path( "*.bed" ) , emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index cfd25908..c39e665c 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -4,16 +4,16 @@ process EXTRACT_TELO { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed") , emit: bed - tuple val(meta), file("*bedgraph"), emit: bedgraph - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ) , emit: bed + tuple val( meta ), file("*bedgraph"), emit: bedgraph + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/find_telomere_regions.nf b/modules/local/find_telomere_regions.nf index f2b78f8a..c9f0e6a3 100755 --- a/modules/local/find_telomere_regions.nf +++ b/modules/local/find_telomere_regions.nf @@ -4,18 +4,13 @@ process FIND_TELOMERE_REGIONS { container 'docker.io/library/gcc:10.4.0' - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "FIND_TELOMERE_REGIONS module does not support Conda. Please use Docker / Singularity instead." - } - input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) val (telomereseq) output: - tuple val(meta), file("*.telomere") , emit: telomere - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.telomere" ) , emit: telomere + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/find_telomere_windows.nf b/modules/local/find_telomere_windows.nf index 675fd544..2fcd0022 100755 --- a/modules/local/find_telomere_windows.nf +++ b/modules/local/find_telomere_windows.nf @@ -3,16 +3,17 @@ process FIND_TELOMERE_WINDOWS { label 'process_low' conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && + !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : 'biocontainers/java-jdk:8.0.112--1' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*.windows") , emit: windows - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.windows" ) , emit: windows + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/fkutils/fkprof/main.nf b/modules/local/fkutils/fkprof/main.nf index 399c2d5a..8562e2c8 100644 --- a/modules/local/fkutils/fkprof/main.nf +++ b/modules/local/fkutils/fkprof/main.nf @@ -6,18 +6,13 @@ process FKUTILS_FKPROF { 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'docker.io/ubuntu:20.04' }" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "FKUTILS_FKPROF module does not support Conda. Please use Docker / Singularity instead." - } - input: - tuple val(meta), path(reference) - tuple val(meta2), path(ktab) + tuple val( meta ), path( reference ) + tuple val( meta2 ), path( ktab ) output: - tuple val(meta), file("*bed"), emit: bed - path "versions.yml", emit: versions + tuple val( meta ), file( "*bed" ), emit: bed + path "versions.yml", emit: versions script: def args = task.ext.args ?: "" diff --git a/modules/local/gap_length.nf b/modules/local/gap_length.nf index dd1a5878..b5bf0733 100755 --- a/modules/local/gap_length.nf +++ b/modules/local/gap_length.nf @@ -4,15 +4,15 @@ process GAP_LENGTH { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bedgraph") , emit: bedgraph - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bedgraph" ) , emit: bedgraph + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index 4ddf0162..6a06bb87 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -2,14 +2,7 @@ process GENERATE_CRAM_CSV { tag "${meta.id}" label 'process_tiny' - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : - 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GENERATE_CRAM_CSV module does not support Conda. Please use Docker / Singularity instead." - } + container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' input: tuple val(meta), path(crampath) diff --git a/modules/local/get_largest_scaff.nf b/modules/local/get_largest_scaff.nf index 091abc3e..2296958c 100755 --- a/modules/local/get_largest_scaff.nf +++ b/modules/local/get_largest_scaff.nf @@ -5,11 +5,11 @@ process GET_LARGEST_SCAFF { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: env largest_scaff , emit: scaff_size diff --git a/modules/local/get_paired_contact_bed.nf b/modules/local/get_paired_contact_bed.nf index b3db6d1f..e6d3a135 100755 --- a/modules/local/get_paired_contact_bed.nf +++ b/modules/local/get_paired_contact_bed.nf @@ -4,15 +4,15 @@ process GET_PAIRED_CONTACT_BED { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed") , emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ) , emit: bed + path "versions.yml" , emit: versions script: def pulled = '-T sort_tmp' diff --git a/modules/local/getminmaxpunches.nf b/modules/local/getminmaxpunches.nf index 0a095b29..6e828bb5 100755 --- a/modules/local/getminmaxpunches.nf +++ b/modules/local/getminmaxpunches.nf @@ -4,16 +4,16 @@ process GETMINMAXPUNCHES{ conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: tuple val(meta), path(bedfile) output: - tuple val(meta), path ('*zero.bed') , optional: true , emit: min - tuple val(meta), path ('*max.bed') , optional: true , emit: max - path "versions.yml" , emit: versions + tuple val(meta), path ( '*zero.bed' ) , optional: true , emit: min + tuple val(meta), path ( '*max.bed' ) , optional: true , emit: max + path "versions.yml" , emit: versions shell: def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. diff --git a/modules/local/graphoverallcoverage.nf b/modules/local/graphoverallcoverage.nf index 572e793d..b8cc8777 100755 --- a/modules/local/graphoverallcoverage.nf +++ b/modules/local/graphoverallcoverage.nf @@ -2,14 +2,7 @@ process GRAPHOVERALLCOVERAGE { tag "$meta.id" label "process_single" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : - 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" - - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "GRAPHOVERALLCOVERAGE module does not support Conda. Please use Docker / Singularity instead." - } + container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' input: tuple val(meta), path(bed) diff --git a/modules/local/juicer_tools_pre.nf b/modules/local/juicer_tools_pre.nf index d12ec28c..12b46ce8 100755 --- a/modules/local/juicer_tools_pre.nf +++ b/modules/local/juicer_tools_pre.nf @@ -5,7 +5,8 @@ process JUICER_TOOLS_PRE { label 'process_medium' conda "bioconda::java-jdk=8.0.112" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine == 'singularity' && + !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/java-jdk:8.0.112--1' : 'biocontainers/java-jdk:8.0.112--1' }" diff --git a/modules/local/paf_to_bed.nf b/modules/local/paf_to_bed.nf index 445d3b2f..c50f0373 100755 --- a/modules/local/paf_to_bed.nf +++ b/modules/local/paf_to_bed.nf @@ -4,15 +4,15 @@ process PAF2BED { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*_punchlist.bed"), emit: punchlist - path "versions.yml" , emit: versions + tuple val( meta ), file( "*_punchlist.bed" ), emit: punchlist + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}_${meta.type}_punchlist" diff --git a/modules/local/pretext_graph.nf b/modules/local/pretext_graph.nf index 9a1d3ff2..3f600441 100644 --- a/modules/local/pretext_graph.nf +++ b/modules/local/pretext_graph.nf @@ -4,11 +4,6 @@ process PRETEXT_GRAPH { container "quay.io/sanger-tol/pretext:0.0.2-yy5-c3" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "PRETEXT_GRAPH module does not support Conda. Please use Docker / Singularity instead." - } - input: tuple val(meta), path(pretext_file) tuple val(gap), path(gap_file) @@ -18,8 +13,8 @@ process PRETEXT_GRAPH { tuple val(rep), path(repeat_density) output: - tuple val(meta), path("*.pretext") , emit: pretext - path "versions.yml" , emit: versions + tuple val(meta), path("*.pretext") , emit: pretext + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/local/reformat_intersect.nf b/modules/local/reformat_intersect.nf index 3c4cdb61..bcc0be77 100755 --- a/modules/local/reformat_intersect.nf +++ b/modules/local/reformat_intersect.nf @@ -4,15 +4,14 @@ process REFORMAT_INTERSECT { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*.bed"), emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*.bed" ), emit: bed shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/rename_ids.nf b/modules/local/rename_ids.nf index b6b12920..f69f518d 100755 --- a/modules/local/rename_ids.nf +++ b/modules/local/rename_ids.nf @@ -4,15 +4,15 @@ process RENAME_IDS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed") , emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ) , emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/replace_dots.nf b/modules/local/replace_dots.nf index bb0f051e..4d12f5cd 100755 --- a/modules/local/replace_dots.nf +++ b/modules/local/replace_dots.nf @@ -4,15 +4,15 @@ process REPLACE_DOTS { conda "conda-forge::coreutils=9.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'docker.io/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'docker.io/ubuntu:20.04' }" input: - tuple val(meta), path(file) + tuple val( meta ), path( file ) output: - tuple val(meta), file("*bed"), emit: bed - path "versions.yml" , emit: versions + tuple val( meta ), file( "*bed" ), emit: bed + path "versions.yml" , emit: versions shell: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/nextflow.config b/nextflow.config index b9439082..af9a14b9 100755 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,6 @@ params { input = null outdir = "./results" juicer = false - steps = "NONE" tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null diff --git a/nextflow_schema.json b/nextflow_schema.json index f8921bc3..00e2ce44 100755 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -35,11 +35,6 @@ "default": false, "fa_icon": "fas fa-check" }, - "steps": { - "type": "string", - "description": "A csv list of steps to skip", - "fa_icon": "fas fa-folder-open" - }, "email": { "type": "string", "description": "Email address for completion summary.", diff --git a/subworkflows/local/ancestral_gene.nf b/subworkflows/local/ancestral_gene.nf index 6ac72ab9..cfee2061 100755 --- a/subworkflows/local/ancestral_gene.nf +++ b/subworkflows/local/ancestral_gene.nf @@ -33,10 +33,10 @@ workflow ANCESTRAL_GENE { // LOGIC: STRIP OUT METADATA // ch_grab - .map {meta, fulltable + .map { meta, fulltable -> fulltable } - .set {assignanc_input} + .set { assignanc_input } // // MODULE: ASSIGN EXTRACTED GENES TO ANCESTRAL GROUPS @@ -61,7 +61,7 @@ workflow ANCESTRAL_GENE { // UCSC_BEDTOBIGBED( BEDTOOLS_SORT.out.sorted, - dot_genome.map{it[1]}, // Pull file from tuple(meta, file) + dot_genome.map{ it[1] }, // Pull file from tuple(meta, file) buscogene_as ) ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) diff --git a/subworkflows/local/busco_annotation.nf b/subworkflows/local/busco_annotation.nf index 40c85166..974738f6 100755 --- a/subworkflows/local/busco_annotation.nf +++ b/subworkflows/local/busco_annotation.nf @@ -31,7 +31,7 @@ workflow BUSCO_ANNOTATION { ch_versions = Channel.empty() // COMMENT: Set BUSCO mode to 'genome' - ch_busco_mode = Channel.of("genome") + ch_busco_mode = Channel.of( "genome" ) // @@ -45,9 +45,9 @@ workflow BUSCO_ANNOTATION { lineagespath, [] ) - ch_versions = ch_versions.mix(BUSCO.out.versions.first()) + ch_versions = ch_versions.mix( BUSCO.out.versions.first() ) - ch_grab = GrabFiles(BUSCO.out.busco_dir) + ch_grab = GrabFiles( BUSCO.out.busco_dir ) // // MODULE: EXTRACT THE BUSCO GENES FOUND IN REFERENCE @@ -55,7 +55,7 @@ workflow BUSCO_ANNOTATION { EXTRACT_BUSCOGENE ( ch_grab ) - ch_versions = ch_versions.mix(EXTRACT_BUSCOGENE.out.versions) + ch_versions = ch_versions.mix( EXTRACT_BUSCOGENE.out.versions ) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE @@ -68,7 +68,7 @@ workflow BUSCO_ANNOTATION { file ) } - .set {bedtools_input} + .set { bedtools_input } // // MODULE: SORT THE EXTRACTED BUSCO GENE // @@ -76,7 +76,7 @@ workflow BUSCO_ANNOTATION { bedtools_input, [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: CONVERT THE BED TO BIGBED @@ -86,30 +86,30 @@ workflow BUSCO_ANNOTATION { dot_genome.map{it[1]}, // Gets file from tuple (meta, file) buscogene_as ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) // // LOGIC: AGGREGATE DATA AND SORT BRANCH ON CLASS // lineageinfo - .combine(BUSCO.out.busco_dir) - .combine(ancestral_table) + .combine( BUSCO.out.busco_dir ) + .combine( ancestral_table ) .branch { lep: it[0].split('_')[0] == "lepidoptera" general: it[0].split('_')[0] != "lepidoptera" } - .set{ch_busco_data} + .set{ ch_busco_data } // // LOGIC: BUILD NEW INPUT CHANNEL FOR ANCESTRAL ID // ch_busco_data .lep - .multiMap {lineage, meta, busco_dir, ancestral_table -> + .multiMap { lineage, meta, busco_dir, ancestral_table -> busco_dir: tuple( meta, busco_dir ) atable: ancestral_table } - .set{ch_busco_lep_data} + .set{ ch_busco_lep_data } // // SUBWORKFLOW: RUN ANCESTRAL BUSCO ID (ONLY AVAILABLE FOR LEPIDOPTERA) @@ -120,7 +120,7 @@ workflow BUSCO_ANNOTATION { buscogene_as, ch_busco_lep_data.atable ) - ch_versions = ch_versions.mix(ANCESTRAL_GENE.out.versions) + ch_versions = ch_versions.mix( ANCESTRAL_GENE.out.versions ) emit: ch_buscogene_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/gap_finder.nf b/subworkflows/local/gap_finder.nf index 89feaf49..5b53d908 100755 --- a/subworkflows/local/gap_finder.nf +++ b/subworkflows/local/gap_finder.nf @@ -20,7 +20,7 @@ workflow GAP_FINDER { SEQTK_CUTN ( reference_tuple ) - ch_versions = ch_versions.mix(SEQTK_CUTN.out.versions) + ch_versions = ch_versions.mix( SEQTK_CUTN.out.versions ) // // MODULE: ADD THE LENGTH OF GAP TO BED FILE - INPUT FOR PRETEXT MODULE @@ -28,7 +28,7 @@ workflow GAP_FINDER { GAP_LENGTH ( SEQTK_CUTN.out.bed ) - ch_versions = ch_versions.mix(GAP_LENGTH.out.versions) + ch_versions = ch_versions.mix( GAP_LENGTH.out.versions ) // // MODULE: BGZIP AND TABIX THE GAP FILE @@ -36,7 +36,7 @@ workflow GAP_FINDER { TABIX_BGZIPTABIX ( SEQTK_CUTN.out.bed ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: gap_file = GAP_LENGTH.out.bedgraph diff --git a/subworkflows/local/gene_alignment.nf b/subworkflows/local/gene_alignment.nf index 243c2f43..3269c201 100755 --- a/subworkflows/local/gene_alignment.nf +++ b/subworkflows/local/gene_alignment.nf @@ -31,7 +31,7 @@ workflow GENE_ALIGNMENT { .map{ meta, file -> "${meta.class}" } - .set {assembly_class} + .set { assembly_class } // @@ -50,14 +50,14 @@ workflow GENE_ALIGNMENT { // SUBWORKFLOW // ch_data - .combine(alignment_datadir) - .combine(assembly_class) + .combine( alignment_datadir ) + .combine( assembly_class ) .map { ch_org, data_dir, classT -> file("${data_dir}${classT}/csv_data/${ch_org}-data.csv") } - .splitCsv(header: true, sep:',') - .map(row -> + .splitCsv( header: true, sep:',') + .map( row -> tuple([ org: row.org, type: row.type, id: row.data_file.split('/')[-1].split('.MOD.')[0] @@ -94,7 +94,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix(GEN_ALIGNMENTS.out.versions) + ch_versions = ch_versions.mix( GEN_ALIGNMENTS.out.versions ) CDS_ALIGNMENTS ( reference_tuple, reference_index, @@ -102,7 +102,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix(CDS_ALIGNMENTS.out.versions) + ch_versions = ch_versions.mix( CDS_ALIGNMENTS.out.versions ) RNA_ALIGNMENTS ( reference_tuple, reference_index, @@ -110,7 +110,7 @@ workflow GENE_ALIGNMENT { dot_genome, intron_size ) - ch_versions = ch_versions.mix(RNA_ALIGNMENTS.out.versions) + ch_versions = ch_versions.mix( RNA_ALIGNMENTS.out.versions ) emit: pep_gff = PEP_ALIGNMENTS.out.tbi_gff diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf index 166475e3..5c4d06a5 100755 --- a/subworkflows/local/generate_genome.nf +++ b/subworkflows/local/generate_genome.nf @@ -3,6 +3,7 @@ // // MODULE IMPORT BLOCK // +include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff' include { GENERATE_UNSORTED_GENOME } from '../../subworkflows/local/generate_unsorted_genome' include { GENERATE_SORTED_GENOME } from '../../subworkflows/local/generate_sorted_genome' @@ -23,7 +24,7 @@ workflow GENERATE_GENOME { reference_file .combine(map_order) - .map{ref_meta, ref, map_order -> + .map{ ref_meta, ref, map_order -> tuple( [ id: ref_meta.id, map_order :map_order @@ -43,7 +44,7 @@ workflow GENERATE_GENOME { GENERATE_SORTED_GENOME ( ch_genomesize_input.sorted ) - ch_versions = ch_versions.mix(GENERATE_SORTED_GENOME.out.versions) + ch_versions = ch_versions.mix( GENERATE_SORTED_GENOME.out.versions ) ch_genomesize = GENERATE_SORTED_GENOME.out.genomesize ch_genome_fai = GENERATE_SORTED_GENOME.out.ref_index ch_versions = GENERATE_SORTED_GENOME.out.versions @@ -54,12 +55,22 @@ workflow GENERATE_GENOME { GENERATE_UNSORTED_GENOME ( ch_genomesize_input.unsorted ) - ch_versions = ch_versions.mix(GENERATE_UNSORTED_GENOME.out.versions) - ch_genomesize = ch_genomesize.mix(GENERATE_UNSORTED_GENOME.out.genomesize) - ch_genome_fai = ch_genome_fai.mix(GENERATE_UNSORTED_GENOME.out.ref_index) + ch_versions = ch_versions.mix( GENERATE_UNSORTED_GENOME.out.versions ) + ch_genomesize = ch_genomesize.mix( GENERATE_UNSORTED_GENOME.out.genomesize ) + ch_genome_fai = ch_genome_fai.mix( GENERATE_UNSORTED_GENOME.out.ref_index ) ch_versions = GENERATE_UNSORTED_GENOME.out.versions + // + // MODULE: Cut out the largest scaffold size and use as comparator against 512MB + // This is the cut off for TABIX using tbi indexes + // + GET_LARGEST_SCAFF ( + ch_genomesize + ) + ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions ) + emit: + max_scaff_size = GET_LARGEST_SCAFF.out.scaff_size.toInteger() dot_genome = ch_genomesize ref_index = ch_genome_fai ref = reference_file diff --git a/subworkflows/local/generate_sorted_genome.nf b/subworkflows/local/generate_sorted_genome.nf index 71ec98b7..bc38e2dd 100755 --- a/subworkflows/local/generate_sorted_genome.nf +++ b/subworkflows/local/generate_sorted_genome.nf @@ -18,13 +18,13 @@ workflow GENERATE_SORTED_GENOME { reference_file, "unsorted.genome" ) - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) genome_size = CUSTOM_GETCHROMSIZES.out.sizes GNU_SORT ( CUSTOM_GETCHROMSIZES.out.sizes ) - ch_versions = ch_versions.mix(GNU_SORT.out.versions) + ch_versions = ch_versions.mix( GNU_SORT.out.versions ) emit: genomesize = GNU_SORT.out.sorted diff --git a/subworkflows/local/generate_unsorted_genome.nf b/subworkflows/local/generate_unsorted_genome.nf index de5e6f0c..93bf8e66 100755 --- a/subworkflows/local/generate_unsorted_genome.nf +++ b/subworkflows/local/generate_unsorted_genome.nf @@ -17,7 +17,7 @@ workflow GENERATE_UNSORTED_GENOME { reference_file, "unsorted.genome" ) - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions ) emit: diff --git a/subworkflows/local/hic_bamtobed.nf b/subworkflows/local/hic_bamtobed.nf index 70ab1ba8..432ae1b7 100755 --- a/subworkflows/local/hic_bamtobed.nf +++ b/subworkflows/local/hic_bamtobed.nf @@ -1,6 +1,6 @@ #!/usr/bin/env nextflow -// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow. +// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow. // It runs markdup, sort and get paired contacts. // Input - Assembled genomic fasta file, .bam file // Output - sorted .bed and paired contact .bed @@ -25,12 +25,12 @@ workflow HIC_BAMTOBED { // LOGIC: PREPARE MARKDUP INPUT // bam_file - .combine(reference_tuple) - .multiMap {meta_bam, bam_file, meta_ref, ref -> - bam : tuple(meta_bam, bam_file) - reference : ref + .combine( reference_tuple ) + .multiMap { meta_bam, bam_file, meta_ref, ref -> + bam : tuple(meta_bam, bam_file ) + reference : ref } - .set {markdup_input} + .set { markdup_input } // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -39,7 +39,7 @@ workflow HIC_BAMTOBED { markdup_input.bam, markdup_input.reference ) - ch_versions = ch_versions.mix (SAMTOOLS_MARKDUP.out.versions) + ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) // // MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE @@ -47,15 +47,15 @@ workflow HIC_BAMTOBED { BAMTOBED_SORT( SAMTOOLS_MARKDUP.out.bam ) - ch_versions = ch_versions.mix(BAMTOBED_SORT.out.versions) + ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions ) // // MODULE: GENERATE CONTACT PAIRS // - GET_PAIRED_CONTACT_BED( - BAMTOBED_SORT.out.sorted_bed + GET_PAIRED_CONTACT_BED( + BAMTOBED_SORT.out.sorted_bed ) - ch_versions = ch_versions.mix(GET_PAIRED_CONTACT_BED.out.versions) + ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions ) emit: paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed diff --git a/subworkflows/local/hic_bwamem2.nf b/subworkflows/local/hic_bwamem2.nf index 67d861cb..9409cf7a 100755 --- a/subworkflows/local/hic_bwamem2.nf +++ b/subworkflows/local/hic_bwamem2.nf @@ -24,14 +24,14 @@ workflow HIC_BWAMEM2 { BWAMEM2_INDEX ( reference_tuple - ) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) + ) + ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) csv_ch .splitCsv() - .combine (reference_tuple) - .combine (BWAMEM2_INDEX.out.index) - .map{cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> + .combine ( reference_tuple ) + .combine ( BWAMEM2_INDEX.out.index ) + .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> tuple([ id: cram_id.id ], @@ -46,7 +46,7 @@ workflow HIC_BWAMEM2 { ref_dir ) } - .set {ch_filtering_input} + .set { ch_filtering_input } // // MODULE: map hic reads by 10,000 container per time using bwamem2 @@ -55,18 +55,18 @@ workflow HIC_BWAMEM2 { ch_filtering_input ) - ch_versions = ch_versions.mix(CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions) + ch_versions = ch_versions.mix( CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.versions ) mappedbam_ch = CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT.out.mappedbam // // LOGIC: PREPARING BAMS FOR MERGE // mappedbam_ch - .map{meta, file -> + .map{ meta, file -> tuple( file ) } .collect() - .map {file -> + .map { file -> tuple ( [ id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] @@ -74,7 +74,7 @@ workflow HIC_BWAMEM2 { file ) } - .set {collected_files_for_merge} + .set { collected_files_for_merge } // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -84,7 +84,7 @@ workflow HIC_BWAMEM2 { reference_tuple, reference_index ) - ch_versions = ch_versions.mix (SAMTOOLS_MERGE.out.versions.first()) + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) emit: diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index aed1f9bb..e379f49a 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -41,21 +41,21 @@ workflow HIC_MAPPING { ch_versions = Channel.empty() // COMMENT: 1000bp BIN SIZE INTERVALS FOR CLOAD - ch_cool_bin = Channel.of(1000) + ch_cool_bin = Channel.of( 1000 ) // // LOGIC: make channel of hic reads as input for GENERATE_CRAM_CSV // reference_tuple - .combine(hic_reads_path) - .map {meta, ref, hic_meta, hic_reads_path -> + .combine( hic_reads_path ) + .map { meta, ref, hic_meta, hic_reads_path -> tuple( [ id: meta.id, single_end: true], hic_reads_path ) } - .set {get_reads_input} + .set { get_reads_input } // // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT @@ -63,21 +63,21 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV ( get_reads_input ) - ch_versions = ch_versions.mix(GENERATE_CRAM_CSV.out.versions) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) // // LOGIC: make branches for different hic aligner. // hic_reads_path .combine(reference_tuple) - .map{meta, hic_read_path, ref_meta, ref-> + .map{ meta, hic_read_path, ref_meta, ref-> tuple( [ id : ref_meta, aligner : meta.aligner ], ref ) - } + } .branch{ minimap2 : it[0].aligner == "minimap2" bwamem2 : it[0].aligner == "bwamem2" @@ -92,7 +92,7 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV.out.csv, reference_index ) - ch_versions = ch_versions.mix(HIC_MINIMAP2.out.versions) + ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) mergedbam = HIC_MINIMAP2.out.mergedbam // @@ -103,18 +103,18 @@ workflow HIC_MAPPING { GENERATE_CRAM_CSV.out.csv, reference_index ) - ch_versions = ch_versions.mix(HIC_BWAMEM2.out.versions) + ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) mergedbam = mergedbam.mix(HIC_BWAMEM2.out.mergedbam) // // LOGIC: PREPARING PRETEXT MAP INPUT // mergedbam - .combine(reference_tuple) - .combine (dot_genome) + .combine( reference_tuple ) + .combine ( dot_genome ) .multiMap { bam_meta, bam, ref_meta, ref_fa, genome_meta, genome_file -> input_bam: tuple( [ id: bam_meta.id, - sz: file(bam).size() ], + sz: file( bam ).size() ], bam ) // NOTE: Inject the genome file into the channel to speed up PretextMap @@ -123,7 +123,7 @@ workflow HIC_MAPPING { genome_file ) } - .set {pretext_input} + .set { pretext_input } // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR LOW RES @@ -132,7 +132,7 @@ workflow HIC_MAPPING { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) + ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) // // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT @@ -145,7 +145,7 @@ workflow HIC_MAPPING { telo_file, repeat_density_file ) - ch_versions = ch_versions.mix(PRETEXT_INGEST_SNDRD.out.versions) + ch_versions = ch_versions.mix( PRETEXT_INGEST_SNDRD.out.versions ) // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES @@ -154,7 +154,7 @@ workflow HIC_MAPPING { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_HIGHRES.out.versions) + ch_versions = ch_versions.mix( PRETEXTMAP_HIGHRES.out.versions ) // // NOTICE: This could fail on LARGE hires maps due to some memory parameter in the C code @@ -169,7 +169,7 @@ workflow HIC_MAPPING { telo_file, repeat_density_file ) - ch_versions = ch_versions.mix(PRETEXT_INGEST_HIRES.out.versions) + ch_versions = ch_versions.mix( PRETEXT_INGEST_HIRES.out.versions ) // // MODULE: GENERATE PNG FROM STANDARD PRETEXT @@ -177,7 +177,7 @@ workflow HIC_MAPPING { SNAPSHOT_SRES ( PRETEXTMAP_STANDRD.out.pretext ) - ch_versions = ch_versions.mix (SNAPSHOT_SRES.out.versions) + ch_versions = ch_versions.mix ( SNAPSHOT_SRES.out.versions ) // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G @@ -195,10 +195,10 @@ workflow HIC_MAPPING { tosubsample : it[0].sz >= 50000000000 unmodified : it[0].sz < 50000000000 } - .set {ch_merged_bam} + .set { ch_merged_bam } // LOGIC: PREPARE BAMTOBED JUICER INPUT. - if (workflow_setting != "RAPID_TOL" && params.juicer == false) { + if ( workflow_setting != "RAPID_TOL" && params.juicer == false ) { // // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G // @@ -215,7 +215,7 @@ workflow HIC_MAPPING { tosubsample : it[0].sz >= 50000000000 unmodified : it[0].sz < 50000000000 } - .set {ch_merged_bam} + .set { ch_merged_bam } // // MODULE: SUBSAMPLE BAM @@ -223,7 +223,7 @@ workflow HIC_MAPPING { SUBSAMPLE_BAM ( ch_merged_bam.tosubsample ) - ch_versions = ch_versions.mix (SUBSAMPLE_BAM.out.versions) + ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) // // LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT @@ -235,12 +235,12 @@ workflow HIC_MAPPING { // LOGIC: PREPARE BAMTOBED JUICER INPUT // ch_subsampled_bam - .combine(reference_tuple) + .combine( reference_tuple ) .multiMap { meta, subsampled_bam, meta_ref, ref -> bam : tuple(meta, subsampled_bam ) reference : tuple(meta_ref, ref) } - .set {ch_bamtobed_juicer_input} + .set { ch_bamtobed_juicer_input } // // SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM @@ -249,19 +249,19 @@ workflow HIC_MAPPING { ch_bamtobed_juicer_input.bam, ch_bamtobed_juicer_input.reference ) - ch_versions = ch_versions.mix(HIC_BAMTOBED_JUICER.out.versions) + ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions ) // // LOGIC: PREPARE JUICER TOOLS INPUT // HIC_BAMTOBED_JUICER.out.paired_contacts_bed - .combine(dot_genome) + .combine( dot_genome ) .multiMap { meta, paired_contacts, meta_my_genome, my_genome -> - paired : tuple([id: meta.id, single_end: true], paired_contacts) + paired : tuple([ id: meta.id, single_end: true], paired_contacts ) genome : my_genome id : meta.id } - .set {ch_juicer_input} + .set { ch_juicer_input } // // MODULE: GENERATE HIC MAP, ONLY IS PIPELINE IS RUNNING ON ENTRY FULL @@ -271,19 +271,19 @@ workflow HIC_MAPPING { ch_juicer_input.genome, ch_juicer_input.id ) - ch_versions = ch_versions.mix(JUICER_TOOLS_PRE.out.versions) + ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) } // // LOGIC: PREPARE BAMTOBED COOLER INPUT // mergedbam - .combine(reference_tuple) + .combine( reference_tuple ) .multiMap { meta, merged_bam, meta_ref, ref -> bam : tuple(meta, merged_bam ) reference : tuple(meta_ref, ref) } - .set {ch_bamtobed_cooler_input} + .set { ch_bamtobed_cooler_input } // // SUBWORKFLOW: BAM TO BED FOR COOLER @@ -292,26 +292,26 @@ workflow HIC_MAPPING { ch_bamtobed_cooler_input.bam, ch_bamtobed_cooler_input.reference ) - ch_versions = ch_versions.mix(HIC_BAMTOBED_COOLER.out.versions) + ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions ) // // LOGIC: BIN CONTACT PAIRS // HIC_BAMTOBED_COOLER.out.paired_contacts_bed - .join(HIC_BAMTOBED_COOLER.out.sorted_bed) - .combine( ch_cool_bin) - .set {ch_binned_pairs} + .join( HIC_BAMTOBED_COOLER.out.sorted_bed ) + .combine( ch_cool_bin ) + .set { ch_binned_pairs } // // LOGIC: PREPARE COOLER INPUT // ch_binned_pairs .combine(dot_genome) - .multiMap {meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> - cooler_in : tuple (meta, pairs, bed, cool_bin) + .multiMap { meta, pairs, bed, cool_bin, meta_my_genome, my_genome -> + cooler_in : tuple ( meta, pairs, bed, cool_bin ) genome_file : my_genome } - .set {ch_cooler} + .set { ch_cooler } // // MODULE: GENERATE A MULTI-RESOLUTION COOLER FILE BY COARSENING @@ -326,7 +326,7 @@ workflow HIC_MAPPING { // LOGIC: REFACTOR CHANNEL FOR ZOOMIFY // COOLER_CLOAD.out.cool - .map{meta, cools, cool_bin -> + .map{ meta, cools, cool_bin -> [meta, cools] } .set{ch_cool} @@ -345,14 +345,14 @@ workflow HIC_MAPPING { ch_cram_files .collect() - .map {meta, cram -> + .map { meta, cram -> tuple( [ id: 'cram', sz: cram instanceof ArrayList ? cram.collect { it.size()} : cram.size(), ], cram ) } - .combine(GENERATE_CRAM_CSV.out.csv) + .combine( GENERATE_CRAM_CSV.out.csv ) .map { meta, data, meta2, csv -> tuple( [ id: meta.id, sz: meta.sz, @@ -361,7 +361,7 @@ workflow HIC_MAPPING { data ) } - .set {ch_reporting_cram} + .set { ch_reporting_cram } emit: mcool = COOLER_ZOOMIFY.out.mcool diff --git a/subworkflows/local/hic_minimap2.nf b/subworkflows/local/hic_minimap2.nf index 76b7cf74..b37ff30d 100755 --- a/subworkflows/local/hic_minimap2.nf +++ b/subworkflows/local/hic_minimap2.nf @@ -19,27 +19,27 @@ workflow HIC_MINIMAP2 { reference_tuple // Channel: tuple [ val(meta), path( file ) ] csv_ch reference_index - + main: ch_versions = Channel.empty() mappedbam_ch = Channel.empty() // - // MODULE: generate minimap2 mmi file - // + // MODULE: generate minimap2 mmi file + // MINIMAP2_INDEX ( reference_tuple - ) - ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) + ) + ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions ) // // LOGIC: generate input channel for mapping - // + // csv_ch .splitCsv() - .combine (reference_tuple) - .combine (MINIMAP2_INDEX.out.index) - .map{cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path -> + .combine ( reference_tuple ) + .combine ( MINIMAP2_INDEX.out.index ) + .map{ cram_id, cram_info, ref_id, ref_dir, mmi_id, mmi_path-> tuple([ id: cram_id.id ], @@ -54,16 +54,16 @@ workflow HIC_MINIMAP2 { ref_dir ) } - .set {ch_filtering_input} + .set { ch_filtering_input } // // MODULE: map hic reads by 10,000 container per time - // + // CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT ( ch_filtering_input ) - ch_versions = ch_versions.mix(CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions) + ch_versions = ch_versions.mix( CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions ) mappedbam_ch = CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.mappedbam @@ -71,19 +71,19 @@ workflow HIC_MINIMAP2 { // LOGIC: PREPARING BAMS FOR MERGE // mappedbam_ch - .map{meta, file -> - tuple(file) + .map{ meta, file -> + tuple( file ) } .collect() - .map {file -> + .map { file -> tuple ( [ - id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] + id: file[0].toString().split('/')[-1].split('_')[0] + '_' + file[0].toString().split('/')[-1].split('_')[1] ], file ) } - .set {collected_files_for_merge} + .set { collected_files_for_merge } // // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES @@ -93,10 +93,10 @@ workflow HIC_MINIMAP2 { reference_tuple, reference_index ) - ch_versions = ch_versions.mix (SAMTOOLS_MERGE.out.versions.first()) - + ch_versions = ch_versions.mix ( SAMTOOLS_MERGE.out.versions.first() ) + emit: - mergedbam = SAMTOOLS_MERGE.out.bam + mergedbam = SAMTOOLS_MERGE.out.bam versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/insilico_digest.nf b/subworkflows/local/insilico_digest.nf index 314d8970..fe6c0f46 100755 --- a/subworkflows/local/insilico_digest.nf +++ b/subworkflows/local/insilico_digest.nf @@ -27,24 +27,24 @@ workflow INSILICO_DIGEST { // MULTIMAP INTO TWO CHANNELS SO THERE IS REFERENCE * ENZYME CHANNELS // reference - .map {meta, data -> + .map { meta, data -> tuple( [ id : meta.id, single_end : false ], file( data ) ) } - .set {input_fasta} + .set { input_fasta } input_fasta .combine(ch_enzyme) - .multiMap {meta, reference, enzyme_id -> + .multiMap { meta, reference, enzyme_id -> fasta : tuple( meta, reference ) enzyme : enzyme_id } - .set {fa2c_input} + .set { fa2c_input } // // MODULE: CONVERTS FASTA INTO A COLOUR-AWARE BIONANO CMAP FORMAT @@ -62,7 +62,7 @@ workflow INSILICO_DIGEST { MAKECMAP_FA2CMAPMULTICOLOR.out.cmap .map{ meta, cfile -> tuple( - [id : cfile.toString().split('_')[-3]], + [ id : cfile.toString().split('_')[-3] ], cfile ) } @@ -71,21 +71,21 @@ workflow INSILICO_DIGEST { MAKECMAP_FA2CMAPMULTICOLOR.out.cmapkey .map{ kfile -> tuple( - [id : kfile.toString().split('_')[-4]], + [ id : kfile.toString().split('_')[-4] ], kfile ) } - .set {ch_cmapkey_new} + .set { ch_cmapkey_new } ch_cmap_new .join(ch_cmapkey_new) - .multiMap {meta, cfile, kfile -> + .multiMap { meta, cfile, kfile -> cmap : tuple( meta, cfile) key_file : kfile } - .set {ch_join} + .set { ch_join } // // MODULE: RENAME CMAP IDs FROM BIONANO IDX TO ORIGINAL GENOMIC LOCATIONS @@ -98,11 +98,11 @@ workflow INSILICO_DIGEST { ch_versions = ch_versions.mix(MAKECMAP_RENAMECMAPIDS.out.versions) MAKECMAP_RENAMECMAPIDS.out.renamedcmap - .multiMap {meta, file -> + .multiMap { meta, file -> full : tuple ( meta, file ) sample : meta.id } - .set {ch_renamedcmap} + .set { ch_renamedcmap } // // MODULE: CONVERT CMAP FILE INTO BED FILE @@ -117,12 +117,12 @@ workflow INSILICO_DIGEST { MAKECMAP_CMAP2BED.out.bedfile .combine(sizefile) .combine(dot_as) - .multiMap {meta, bed, meta_2, dot_genome, as_file -> - bed_tuple : tuple(meta, bed) + .multiMap { meta, bed, meta_2, dot_genome, as_file -> + bed_tuple : tuple( meta, bed ) genome_file : dot_genome autosql : as_file } - .set {combined_ch} + .set { combined_ch } // // MODULE: CONVERT ABOVE BED INTO BIGBED WITH ADDITIONAL AS FILE diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf index 9ef7278c..2c98f955 100755 --- a/subworkflows/local/kmer.nf +++ b/subworkflows/local/kmer.nf @@ -26,47 +26,47 @@ workflow KMER { // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT // reads_path - .map {meta, reads_path -> + .map { meta, reads_path -> tuple( [ id : meta.id, single_end : true ], reads_path ) } - .set {get_reads_input} + .set { get_reads_input } // // MODULE: GETS PACBIO READ PATHS FROM READS_PATH // - ch_grabbed_read_paths = GrabFiles(get_reads_input) + ch_grabbed_read_paths = GrabFiles( get_reads_input ) // // MODULE: JOIN PACBIO READ // CAT_CAT( ch_grabbed_read_paths ) - ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + ch_versions = ch_versions.mix( CAT_CAT.out.versions.first() ) // // MODULE: COUNT KMERS // FASTK_FASTK( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first()) + ch_versions = ch_versions.mix( FASTK_FASTK.out.versions.first() ) // // LOGIC: PREPARE MERQURYFK INPUT // FASTK_FASTK.out.hist - .combine(FASTK_FASTK.out.ktab) - .combine(reference_tuple) + .combine( FASTK_FASTK.out.ktab ) + .combine( reference_tuple ) .map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary -> tuple( meta_hist, hist, ktab, primary, [] ) } - .set{ch_merq} + .set{ ch_merq } // // MODULE: USE KMER HISTOGRAM TO PRODUCE SPECTRA GRAPH // - MERQURYFK_MERQURYFK (ch_merq) + MERQURYFK_MERQURYFK ( ch_merq ) ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions.first() ) emit: @@ -82,10 +82,10 @@ process GrabFiles { executor 'local' input: - tuple val(meta), path("in") + tuple val( meta ), path( "in" ) output: - tuple val(meta), path("in/*.fasta.gz") + tuple val( meta ), path( "in/*.fasta.gz" ) "true" } diff --git a/subworkflows/local/nuc_alignments.nf b/subworkflows/local/nuc_alignments.nf index e918b643..cff5235f 100755 --- a/subworkflows/local/nuc_alignments.nf +++ b/subworkflows/local/nuc_alignments.nf @@ -33,17 +33,16 @@ workflow NUC_ALIGNMENTS { // nuc_files .flatten() - .buffer(size: 2) - .combine(reference_tuple) - .combine(intron_size) - .map {meta, nuc_file, ref_meta, ref, intron -> - tuple( [ - id: meta.id, - type: meta.type, - org: meta.org, - intron_size: intron, - split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], - single_end: true + .buffer( size: 2 ) + .combine ( reference_tuple ) + .combine( intron_size ) + .map { meta, nuc_file, ref_meta, ref, intron -> + tuple( [id: meta.id, + type: meta.type, + org: meta.org, + intron_size: intron, + split_prefix: nuc_file.toString().split('/')[-1].split('.fasta')[0], + single_end: true ], nuc_file, ref, @@ -53,15 +52,15 @@ workflow NUC_ALIGNMENTS { false ) } - .multiMap {meta, nuc_file, reference, bool_1, bool_2, bool_3, bool_4 -> - nuc : tuple(meta, nuc_file) + .multiMap { meta, nuc_file, reference, bool_1, bool_2, bool_3, bool_4 -> + nuc : tuple( meta, nuc_file) ref : reference bool_bam_output : bool_1 bool_cigar_paf : bool_2 bool_cigar_bam : bool_3 bool_bedfile : bool_4 } - .set {formatted_input} + .set { formatted_input } // // MODULE: ALIGNS REFERENCE FAIDX TO THE GENE_ALIGNMENT QUERY FILE FROM NUC_FILES @@ -82,13 +81,13 @@ workflow NUC_ALIGNMENTS { // AND DATA TYPE (RNA, CDS, DNA). // MINIMAP2_ALIGN.out.bam - .map {meta, file -> + .map { meta, file -> tuple( [ id: meta.org, type: meta.type ], - file)} - .groupTuple(by: [0]) // group by meta list - .set {merge_input} + file) } + .groupTuple( by: [0] ) // group by meta list + .set { merge_input } // // MODULE: MERGES THE BAM FILES FOUND IN THE GROUPED TUPLE IN REGARDS TO THE REFERENCE @@ -112,7 +111,7 @@ workflow NUC_ALIGNMENTS { // // MODULE: CONVERTS THE ABOVE MERGED BAM INTO BED FORMAT // - BEDTOOLS_BAMTOBED (SAMTOOLS_MERGE.out.bam) + BEDTOOLS_BAMTOBED ( SAMTOOLS_MERGE.out.bam ) ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) // TODO: try filtering out here too @@ -121,7 +120,7 @@ workflow NUC_ALIGNMENTS { // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // BEDTOOLS_BAMTOBED.out.bed - .map {meta, file -> + .map { meta, file -> tuple ( [ id: meta.id, type: meta.type, lines: file.countLines() @@ -129,7 +128,7 @@ workflow NUC_ALIGNMENTS { file ) } - .set {bedtools_input} + .set { bedtools_input } // // MODULE: SORTS THE ABOVE BED FILE @@ -151,17 +150,16 @@ workflow NUC_ALIGNMENTS { file_size: file.size() ], file ) } - .filter {it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink - .combine(dot_genome) - .multiMap {meta, ref, genome_meta, genome -> + .filter { it[0].file_size >= 141 } // Take the first item in input (meta) and check if size is more than a symlink + .combine( dot_genome ) + .multiMap { meta, ref, genome_meta, genome -> bed_file: tuple( [ id: meta.id, type: meta.type, ], - ref - ) + ref ) dot_genome: genome } - .set {ucsc_input} + .set { ucsc_input } // // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGBED FILE @@ -171,7 +169,7 @@ workflow NUC_ALIGNMENTS { ucsc_input.dot_genome, [] ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) emit: nuc_alignment = UCSC_BEDTOBIGBED.out.bigbed.collect() diff --git a/subworkflows/local/pep_alignments.nf b/subworkflows/local/pep_alignments.nf index a3b3cae3..32d6da30 100755 --- a/subworkflows/local/pep_alignments.nf +++ b/subworkflows/local/pep_alignments.nf @@ -34,23 +34,19 @@ workflow PEP_ALIGNMENTS { // pep_files .flatten() - .buffer(size: 2) - .combine (MINIPROT_INDEX.out.index) - .multiMap {pep_meta, pep_file, miniprot_meta, miniprot_index -> - pep_tuple : tuple( [ - id: pep_meta.id, - type: pep_meta.type, - org: pep_meta.org - ], - pep_file - ) - index_file : tuple( [ - id: "Reference" - ], - miniprot_index - ) + .buffer( size: 2 ) + .combine ( MINIPROT_INDEX.out.index ) + .multiMap { pep_meta, pep_file, miniprot_meta, miniprot_index -> + pep_tuple : tuple( [ id: pep_meta.id, + type: pep_meta.type, + org: pep_meta.org + ], + pep_file ) + index_file : tuple( [ id: "Reference", + ], + miniprot_index ) } - .set {formatted_input} + .set { formatted_input } // // MODULE: ALIGNS PEP DATA WITH REFERENCE INDEX @@ -60,21 +56,21 @@ workflow PEP_ALIGNMENTS { formatted_input.pep_tuple, formatted_input.index_file ) - ch_versions = ch_versions.mix(MINIPROT_ALIGN.out.versions) + ch_versions = ch_versions.mix( MINIPROT_ALIGN.out.versions ) // // LOGIC: GROUPS OUTPUT GFFS BASED ON QUERY ORGANISMS AND DATA TYPE (PEP) // MINIPROT_ALIGN.out.gff - .map {meta, file -> + .map { meta, file -> tuple( [ id : meta.org + '_pep', type : meta.type ], file ) } - .groupTuple(by: [0]) - .set {grouped_tuple} + .groupTuple( by: [0] ) + .set { grouped_tuple } // // MODULE: AS ABOVE OUTPUT IS BED FORMAT, IT IS MERGED PER ORGANISM + TYPE @@ -82,20 +78,20 @@ workflow PEP_ALIGNMENTS { CAT_CAT ( grouped_tuple ) - ch_versions = ch_versions.mix(CAT_CAT.out.versions) + ch_versions = ch_versions.mix( CAT_CAT.out.versions ) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // CAT_CAT.out.file_out - .map {meta, file -> + .map { meta, file -> tuple ( [ id: meta.id, lines: file.countLines() ], file ) } - .set {bedtools_input} + .set { bedtools_input } // // MODULE: SORTS ABOVE OUTPUT AND RETAINS GFF SUFFIX @@ -105,7 +101,7 @@ workflow PEP_ALIGNMENTS { bedtools_input , [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: CUTS GFF INTO PUNCHLIST @@ -113,7 +109,7 @@ workflow PEP_ALIGNMENTS { EXTRACT_COV_IDEN ( CAT_CAT.out.file_out ) - ch_versions = ch_versions.mix(EXTRACT_COV_IDEN.out.versions) + ch_versions = ch_versions.mix( EXTRACT_COV_IDEN.out.versions ) // // MODULE: COMPRESS AND INDEX MERGED.GFF @@ -122,7 +118,7 @@ workflow PEP_ALIGNMENTS { TABIX_BGZIPTABIX ( BEDTOOLS_SORT.out.sorted ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: gff_file = BEDTOOLS_SORT.out.sorted diff --git a/subworkflows/local/repeat_density.nf b/subworkflows/local/repeat_density.nf index 2e5f70eb..8764408f 100755 --- a/subworkflows/local/repeat_density.nf +++ b/subworkflows/local/repeat_density.nf @@ -31,7 +31,7 @@ workflow REPEAT_DENSITY { WINDOWMASKER_MKCOUNTS ( reference_tuple ) - ch_versions = ch_versions.mix(WINDOWMASKER_MKCOUNTS.out.versions) + ch_versions = ch_versions.mix( WINDOWMASKER_MKCOUNTS.out.versions ) // // MODULE: CALCULATE THE STATISTICS OF THE MARKED UP REGIONS @@ -40,7 +40,7 @@ workflow REPEAT_DENSITY { WINDOWMASKER_MKCOUNTS.out.counts, reference_tuple ) - ch_versions = ch_versions.mix(WINDOWMASKER_USTAT.out.versions) + ch_versions = ch_versions.mix( WINDOWMASKER_USTAT.out.versions ) // // MODULE: USE USTAT OUTPUT TO EXTRACT REPEATS FROM FASTA @@ -48,7 +48,7 @@ workflow REPEAT_DENSITY { EXTRACT_REPEAT( WINDOWMASKER_USTAT.out.intervals ) - ch_versions = ch_versions.mix(EXTRACT_REPEAT.out.versions) + ch_versions = ch_versions.mix( EXTRACT_REPEAT.out.versions ) // // MODULE: CREATE WINDOWS FROM .GENOME FILE @@ -56,7 +56,7 @@ workflow REPEAT_DENSITY { BEDTOOLS_MAKEWINDOWS( dot_genome ) - ch_versions = ch_versions.mix(BEDTOOLS_MAKEWINDOWS.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_MAKEWINDOWS.out.versions ) // // LOGIC: COMBINE TWO CHANNELS AND OUTPUT tuple(meta, windows_file, repeat_file) @@ -70,7 +70,7 @@ workflow REPEAT_DENSITY { repeat_file ) } - .set {intervals} + .set { intervals } // // MODULE: GENERATES THE REPEAT FILE FROM THE WINDOW FILE AND GENOME FILE @@ -79,7 +79,7 @@ workflow REPEAT_DENSITY { intervals, dot_genome ) - ch_versions = ch_versions.mix(BEDTOOLS_INTERSECT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_INTERSECT.out.versions ) // // MODULE: FIXES IDS FOR REPEATS @@ -87,7 +87,7 @@ workflow REPEAT_DENSITY { RENAME_IDS( BEDTOOLS_INTERSECT.out.intersect ) - ch_versions = ch_versions.mix(RENAME_IDS.out.versions) + ch_versions = ch_versions.mix( RENAME_IDS.out.versions ) // // MODULE: SORTS THE ABOVE BED FILES @@ -95,17 +95,17 @@ workflow REPEAT_DENSITY { GNU_SORT_A ( RENAME_IDS.out.bed // Intersect file ) - ch_versions = ch_versions.mix(GNU_SORT_A.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_A.out.versions ) GNU_SORT_B ( dot_genome // Genome file - Will not run unless genome file is sorted to ) - ch_versions = ch_versions.mix(GNU_SORT_B.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_B.out.versions ) GNU_SORT_C ( BEDTOOLS_MAKEWINDOWS.out.bed // Windows file ) - ch_versions = ch_versions.mix(GNU_SORT_C.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // // MODULE: ADDS 4TH COLUMN TO BED FILE USED IN THE REPEAT DENSITY GRAPH @@ -113,7 +113,7 @@ workflow REPEAT_DENSITY { REFORMAT_INTERSECT ( GNU_SORT_A.out.sorted ) - ch_versions = ch_versions.mix(REFORMAT_INTERSECT.out.versions) + ch_versions = ch_versions.mix( GNU_SORT_C.out.versions ) // // MODULE: TABIX AND GZIP THE REPEAT DENSITY BED FILE FOR JBROWSE @@ -121,7 +121,7 @@ workflow REPEAT_DENSITY { TABIX_BGZIPTABIX ( REFORMAT_INTERSECT.out.bed ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) // // LOGIC: COMBINES THE REFORMATTED INTERSECT FILE AND WINDOWS FILE CHANNELS AND SORTS INTO @@ -136,7 +136,7 @@ workflow REPEAT_DENSITY { bed ) } - .set {for_mapping} + .set { for_mapping } // // MODULE: MAPS THE REPEATS AGAINST THE REFERENCE GENOME @@ -145,7 +145,7 @@ workflow REPEAT_DENSITY { for_mapping, GNU_SORT_B.out.sorted ) - ch_versions = ch_versions.mix(BEDTOOLS_MAP.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_MAP.out.versions ) // // MODULE: REPLACES . WITH 0 IN MAPPED FILE @@ -153,16 +153,16 @@ workflow REPEAT_DENSITY { REPLACE_DOTS ( BEDTOOLS_MAP.out.mapped ) - ch_versions = ch_versions.mix(REPLACE_DOTS.out.versions) + ch_versions = ch_versions.mix( REPLACE_DOTS.out.versions ) // // MODULE: CONVERTS GENOME FILE AND BED INTO A BIGWIG FILE // UCSC_BEDGRAPHTOBIGWIG( REPLACE_DOTS.out.bed, - GNU_SORT_B.out.sorted.map{it[1]} // Pulls file from tuple of meta and file + GNU_SORT_B.out.sorted.map { it[1] } // Pulls file from tuple of meta and file ) - ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) emit: repeat_density = UCSC_BEDGRAPHTOBIGWIG.out.bigwig diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index 9d1e0e95..25022c77 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -36,7 +36,7 @@ workflow SELFCOMP { SELFCOMP_SPLITFASTA( reference_tuple ) - ch_versions = ch_versions.mix(SELFCOMP_SPLITFASTA.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_SPLITFASTA.out.versions ) // // LOGIC: CALCULATE THE NUMBER OF GB WHICH WILL DICTATE THE NUMBER OF @@ -44,12 +44,12 @@ workflow SELFCOMP { // ALSO CALCULATES THE NUMBER OF TOTAL WINDOWS NEEDED IN THE REFERENCE // reference_tuple - .map{it, file -> file.size()} - .set{file_size} // Using set as TAP will force the pipeline to not complete successfully in some cases + .map{ it, file -> file.size()} + .set { file_size } // Using set as TAP will force the pipeline to not complete successfully in some cases file_size .sum{it / 1e9} - .collect {new java.math.BigDecimal (it).setScale(0, RoundingMode.UP)} + .collect { new java.math.BigDecimal (it).setScale(0, RoundingMode.UP) } .flatten() .set { chunk_number } @@ -61,7 +61,7 @@ workflow SELFCOMP { SELFCOMP_SPLITFASTA.out.fa, chunk_number ) - ch_versions = ch_versions.mix(CHUNKFASTA.out.versions) + ch_versions = ch_versions.mix( CHUNKFASTA.out.versions ) // // LOGIC: STRIP META FROM QUERY, AND COMBINE WITH REFERENCE FILE @@ -69,19 +69,19 @@ workflow SELFCOMP { // OR n=((REFERENCE / 1E9) * (REFENCE / 1E9)) IF GENOME.SIZE() > 1GB // CHUNKFASTA.out.fasta - .map{meta, query -> + .map{ meta, query -> query } .collect() // Collect any output from CHUNKFASTA - .map {it -> + .map { it -> tuple( [ len: it.size() ], // Calc length of list it ) } - .set {len_ch} // tap out to preserve length of CHUNKFASTA list + .set { len_ch } // tap out to preserve length of CHUNKFASTA list len_ch // tap swapped with set as tap stops pipeline completion - .map {meta, files -> + .map { meta, files -> files } .flatten() // flatten list into singles @@ -99,7 +99,7 @@ workflow SELFCOMP { } .transpose() // Transpose the channel so that we have a channel for file in query // allows this to work on list of 1 and beyond - .map{meta, ref, qry -> + .map { meta, ref, qry -> tuple( [ id: meta.id, sz: meta.sz, it: qry.toString().split('/')[-1] // get file name of the new query @@ -108,7 +108,7 @@ workflow SELFCOMP { qry ) } - .set{mummer_input} + .set{ mummer_input } // // MODULE: ALIGNS 1GB CHUNKS TO 500KB CHUNKS @@ -117,25 +117,25 @@ workflow SELFCOMP { MUMMER( mummer_input ) - ch_versions = ch_versions.mix(MUMMER.out.versions) + ch_versions = ch_versions.mix( MUMMER.out.versions ) // // LOGIC: COLLECT COORD FILES AND CONVERT TO LIST OF FILES // ADD REFERENCE META // MUMMER.out.coords - .map{meta, file -> + .map{ meta, file -> file } .collect() .toList() - .combine(reference_tuple) - .map{files, meta, ref -> + .combine( reference_tuple ) + .map { files, meta, ref -> tuple( meta, files ) } - .set {ch_mummer_files} + .set { ch_mummer_files } // // MODULE: MERGES MUMMER ALIGNMENT FILES @@ -143,7 +143,7 @@ workflow SELFCOMP { CAT_CAT( ch_mummer_files ) - ch_versions = ch_versions.mix(CAT_CAT.out.versions) + ch_versions = ch_versions.mix( CAT_CAT.out.versions ) // // MODULE: CONVERT THE MUMMER ALIGNMENTS INTO BED FORMAT @@ -152,7 +152,7 @@ workflow SELFCOMP { CAT_CAT.out.file_out, motif_len ) - ch_versions = ch_versions.mix(SELFCOMP_MUMMER2BED.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_MUMMER2BED.out.versions ) // // MODULE: GENERATE A LIST OF IDs AND GENOMIC POSITIONS OF SELFCOMPLEMENTARY REGIONS @@ -162,20 +162,20 @@ workflow SELFCOMP { SELFCOMP_MUMMER2BED.out.bedfile, SELFCOMP_SPLITFASTA.out.agp ) - ch_versions = ch_versions.mix(SELFCOMP_MAPIDS.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_MAPIDS.out.versions ) // // LOGIC: ADDING LINE COUNT TO THE FILE FOR BETTER RESOURCE USAGE // SELFCOMP_MAPIDS.out.bedfile - .map{meta, file -> + .map { meta, file -> tuple ( [ id: meta.id, lines: file.countLines() ], file ) } - .set{bedtools_input} + .set { bedtools_input } // // MODULE: SORTS ABOVE OUTPUT BED FILE AND RETAINS BED SUFFIX @@ -184,7 +184,7 @@ workflow SELFCOMP { bedtools_input, [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULE: BUILD ALIGNMENT BLOCKS @@ -192,7 +192,7 @@ workflow SELFCOMP { SELFCOMP_ALIGNMENTBLOCKS( BEDTOOLS_SORT.out.sorted ) - ch_versions = ch_versions.mix(SELFCOMP_ALIGNMENTBLOCKS.out.versions) + ch_versions = ch_versions.mix( SELFCOMP_ALIGNMENTBLOCKS.out.versions ) // // MODULE: SORT BLOCKS FILES AND FILTER BY MOTIF LENGTH @@ -200,7 +200,7 @@ workflow SELFCOMP { CONCATBLOCKS( SELFCOMP_ALIGNMENTBLOCKS.out.blockfile ) - ch_versions = ch_versions.mix(CONCATBLOCKS.out.versions) + ch_versions = ch_versions.mix( CONCATBLOCKS.out.versions ) // // MODULE: CONVERTS ABOVE OUTPUT INTO BIGBED FORMAT @@ -210,7 +210,7 @@ workflow SELFCOMP { dot_genome.map{it[1]}, // Pulls file from tuple ( meta and file ) selfcomp_as ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions ) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) emit: ch_bigbed = UCSC_BEDTOBIGBED.out.bigbed diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf index 04b43cf4..33c0875b 100755 --- a/subworkflows/local/synteny.nf +++ b/subworkflows/local/synteny.nf @@ -18,21 +18,21 @@ workflow SYNTENY { // AND PARSE INTO CHANNEL PER GENOME // reference_tuple - .combine(synteny_path) - .map{meta, reference, dir_path -> + .combine( synteny_path ) + .map { meta, reference, dir_path -> file("${dir_path}${meta.class}/*.fasta") } .flatten() - .combine(reference_tuple) - .multiMap{syntenic_ref, meta, ref -> - syntenic_tuple : tuple(meta, syntenic_ref) + .combine( reference_tuple ) + .multiMap { syntenic_ref, meta, ref -> + syntenic_tuple : tuple( meta, syntenic_ref ) reference_fa : ref bool_bam_output : false bool_cigar_paf : true bool_cigar_bam : false bool_bedfile : false } - .set {mm_input} + .set { mm_input } // // MODULE: ALIGNS THE SUNTENIC GENOMES TO THE REFERENCE GENOME @@ -46,7 +46,7 @@ workflow SYNTENY { mm_input.bool_cigar_bam, mm_input.bool_bedfile, ) - ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions ) emit: ch_paf = MINIMAP2_ALIGN.out.paf diff --git a/subworkflows/local/telo_finder.nf b/subworkflows/local/telo_finder.nf index aa0b7b80..fe5b704b 100755 --- a/subworkflows/local/telo_finder.nf +++ b/subworkflows/local/telo_finder.nf @@ -24,7 +24,7 @@ workflow TELO_FINDER { reference_tuple, teloseq ) - ch_versions = ch_versions.mix(FIND_TELOMERE_REGIONS.out.versions) + ch_versions = ch_versions.mix( FIND_TELOMERE_REGIONS.out.versions ) // // MODULE: GENERATES A WINDOWS FILE FROM THE ABOVE @@ -32,7 +32,7 @@ workflow TELO_FINDER { FIND_TELOMERE_WINDOWS ( FIND_TELOMERE_REGIONS.out.telomere ) - ch_versions = ch_versions.mix(FIND_TELOMERE_WINDOWS.out.versions) + ch_versions = ch_versions.mix( FIND_TELOMERE_WINDOWS.out.versions ) // // MODULE: EXTRACTS THE LOCATION OF TELOMERIC SEQUENCE BASED ON THE WINDOWS @@ -40,7 +40,7 @@ workflow TELO_FINDER { EXTRACT_TELO ( FIND_TELOMERE_WINDOWS.out.windows ) - ch_versions = ch_versions.mix(EXTRACT_TELO.out.versions) + ch_versions = ch_versions.mix( EXTRACT_TELO.out.versions ) // // MODULE: BGZIP AND TABIX THE OUTPUT FILE @@ -48,7 +48,7 @@ workflow TELO_FINDER { TABIX_BGZIPTABIX ( EXTRACT_TELO.out.bed ) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) + ch_versions = ch_versions.mix( TABIX_BGZIPTABIX.out.versions ) emit: bed_file = EXTRACT_TELO.out.bed diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index a734a728..e3ad75da 100755 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -24,18 +24,18 @@ workflow YAML_INPUT { .flatten() .combine( workflow_id ) .multiMap { data, id -> - assembly: (data.assembly) - assembly_reads: (data.assem_reads) - hic_data: (data.hic_data) - kmer_profile: (data.kmer_profile) - reference: (file(data.reference_file, checkIfExists: true)) - alignment: (id == "FULL" || id == "JBROWSE" ? data.alignment : "") - self_comp: (id == "FULL" || id == "JBROWSE" ? data.self_comp : "") - synteny: (id == "FULL" || id == "JBROWSE" ? data.synteny : "") - intron: (id == "FULL" || id == "JBROWSE" ? data.intron : "") - busco_gene: (data.busco) - teloseq: (data.telomere) - map_order: (data.map_order) + assembly: ( data.assembly ) + assembly_reads: ( data.assem_reads ) + hic_data: ( data.hic_data ) + kmer_profile: ( data.kmer_profile ) + reference: ( file(data.reference_file, checkIfExists: true) ) + alignment: ( id == "FULL" ? data.alignment : "" ) + self_comp: ( id == "FULL" ? data.self_comp : "" ) + synteny: ( id == "FULL" ? data.synteny : "" ) + intron: ( id == "FULL" ? data.intron : "" ) + busco_gene: ( data.busco ) + teloseq: ( data.telomere ) + map_order: ( data.map_order) } .set{ group } @@ -44,7 +44,7 @@ workflow YAML_INPUT { // group .assembly - .multiMap{ data -> + .multiMap { data -> assem_level: data.assem_level assem_version: data.assem_version sample_id: data.sample_id @@ -52,98 +52,97 @@ workflow YAML_INPUT { defined_class: data.defined_class project_id: data.project_id } - .set{assembly_data} + .set { assembly_data } group .assembly_reads - .multiMap{ data -> + .multiMap { data -> read_type: data.read_type read_data: data.read_data supplement: data.supplementary_data } - .set{assem_reads} + .set { assem_reads } group .hic_data - .multiMap{ data -> + .multiMap { data -> hic_cram: data.hic_cram hic_aligner: data.hic_aligner } - .set {hic} + .set { hic } group .kmer_profile - .multiMap{ data -> + .multiMap { data -> length: data.kmer_length dir: data.dir } - .set {kmer_profiling} + .set { kmer_profiling } group .alignment - .combine(workflow_id) - .multiMap{ data, id -> - data_dir: (id == "FULL" || id == "JBROWSE" ? data.data_dir : "") - common_name: (id == "FULL" || id == "JBROWSE" ? data.common_name : "") - geneset_id: (id == "FULL" || id == "JBROWSE" ? data.geneset_id : "") + .combine( workflow_id ) + .multiMap { data, id -> + data_dir: (id == "FULL" ? data.data_dir : "") + common_name: (id == "FULL" ? data.common_name : "") + geneset_id: (id == "FULL" ? data.geneset_id : "") } - .set{alignment_data} + .set{ alignment_data } group .self_comp - .combine(workflow_id) - .multiMap{ data, id -> - motif_len: (id == "FULL" || id == "JBROWSE" ? data.motif_len : "") - mummer_chunk: (id == "FULL" || id == "JBROWSE" ? data.mummer_chunk : "") + .combine( workflow_id ) + .multiMap { data, id -> + motif_len: (id == "FULL" ? data.motif_len : "") + mummer_chunk: (id == "FULL" ? data.mummer_chunk : "") } - .set{selfcomp_data} + .set{ selfcomp_data } group .synteny - .combine(workflow_id) - .multiMap{ data, id -> - synteny_genome: (id == "FULL" || id == "JBROWSE" ? data.synteny_genome_path: "") + .combine( workflow_id ) + .multiMap { data, id -> + synteny_genome: (id == "FULL" ? data.synteny_genome_path: "") } - .set{synteny_data} + .set{ synteny_data } group .intron - .combine(workflow_id) - .multiMap{ data, id -> - size: (id == "FULL" || id == "JBROWSE" ? data.size : "") + .combine( workflow_id ) + .multiMap { data, id -> + size: (id == "FULL" ? data.size : "") } - .set {intron_size} + .set { intron_size } group .teloseq - .multiMap{ data -> + .multiMap { data -> teloseq: data.teloseq } - .set {teloseq} + .set { teloseq } group .busco_gene - .multiMap{ data -> + .multiMap { data -> lineage: data.lineage lineages_path: data.lineages_path } - .set {busco_lineage} + .set { busco_lineage } // // LOGIC: COMBINE SOME CHANNELS INTO VALUES REQUIRED DOWNSTREAM // assembly_data.sample_id - .combine(assembly_data.assem_version) - .map{it1, it2 -> - ("${it1}_${it2}") - } - .set{tolid_version} + .combine( assembly_data.assem_version ) + .map { it1, it2 -> + ("${it1}_${it2}")} + .set { tolid_version } tolid_version - .combine(group.reference) - .combine(assembly_data.defined_class) - .combine(assembly_data.project_id) - .map{sample, ref_file, defined_class, project -> + .combine( group.reference ) + .combine( assembly_data.defined_class ) + .combine( assembly_data.project_id ) + .map { sample, ref_file, defined_class, project -> tuple( [ id: sample, class: defined_class, project_type: project @@ -151,13 +150,13 @@ workflow YAML_INPUT { ref_file ) } - .set{ref_ch} + .set { ref_ch } - if (assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" }) { + if ( assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" } ) { tolid_version - .combine(assem_reads.read_type) - .combine(assem_reads.read_data) - .map{sample, type, data -> + .combine( assem_reads.read_type ) + .combine( assem_reads.read_data ) + .map{ sample, type, data -> tuple( [ id : sample, single_end : true, read_type : type @@ -165,13 +164,13 @@ workflow YAML_INPUT { data ) } - .set {read_ch} + .set { read_ch } } - else if (assem_reads.read_type.filter {it == "illumina"}) { + else if ( assem_reads.read_type.filter { it == "illumina" } ) { tolid_version - .combine(assem_reads.read_type) - .combine(assem_reads.read_data) - .map{sample, type, data -> + .combine( assem_reads.read_type ) + .combine( assem_reads.read_data ) + .map{ sample, type, data -> tuple( [ id : sample, single_end : false, read_type : type @@ -179,40 +178,40 @@ workflow YAML_INPUT { data ) } - .set {read_ch} + .set { read_ch } } tolid_version - .combine(hic.hic_cram) - .combine(hic.hic_aligner) - .map{sample, data, aligner -> + .combine( hic.hic_cram ) + .combine( hic.hic_aligner ) + .map { sample, data, aligner -> tuple( [ id: sample, aligner: aligner ], data ) } - .set {hic_ch} + .set { hic_ch } tolid_version - .combine(assem_reads.supplement) - .map{sample, data -> + .combine( assem_reads.supplement ) + .map { sample, data -> tuple( [ id: sample ], data ) } - .set {supplement_ch} + .set { supplement_ch } tolid_version - .combine (assembly_data.sample_id) - .combine (kmer_profiling.length) - .combine (kmer_profiling.dir) - .map{sample, sample_id, kmer_len, dir -> + .combine ( assembly_data.sample_id ) + .combine ( kmer_profiling.length ) + .combine ( kmer_profiling.dir ) + .map { sample, sample_id, kmer_len, dir -> tuple( [ id: sample, kmer: kmer_len ], file("${dir}/k${kmer_len}/${sample_id}.k${kmer_len}.ktab") // Don't check for existence yet ) } - .set {kmer_prof} + .set { kmer_prof } emit: assembly_id = tolid_version @@ -246,5 +245,5 @@ workflow YAML_INPUT { } def readYAML( yamlfile ) { - return new Yaml().load( new FileReader( yamlfile.toString())) + return new Yaml().load( new FileReader( yamlfile.toString() ) ) } diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 8dc903bc..526075da 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -63,14 +63,6 @@ workflow TREEVAL { // ch_versions = Channel.empty() - exclude_workflow_steps = params.steps ? params.steps.split(",") : "NONE" - - full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] - - if (!full_list.containsAll(exclude_workflow_steps)) { - exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" - } - params.entry = 'FULL' input_ch = Channel.fromPath(params.input, checkIfExists: true) @@ -119,17 +111,15 @@ workflow TREEVAL { // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate // file with enzymatic digest sites. // - if ( !exclude_workflow_steps.contains("insilico_digest")) { - ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) + ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) - INSILICO_DIGEST ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - ch_enzyme, - digest_asfile - ) - ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) - } + INSILICO_DIGEST ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + ch_enzyme, + digest_asfile + ) + ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) // // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS @@ -145,141 +135,115 @@ workflow TREEVAL { // // SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data // - if ( !exclude_workflow_steps.contains("gene_alignment")) { - GENE_ALIGNMENT ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - YAML_INPUT.out.align_data_dir, - YAML_INPUT.out.align_geneset, - YAML_INPUT.out.align_common, - YAML_INPUT.out.intron_size, - gene_alignment_asfiles - ) - ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) - } + GENE_ALIGNMENT ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + YAML_INPUT.out.align_data_dir, + YAML_INPUT.out.align_geneset, + YAML_INPUT.out.align_common, + YAML_INPUT.out.intron_size, + gene_alignment_asfiles + ) + ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - if ( !exclude_workflow_steps.contains("repeat_density")) { - REPEAT_DENSITY ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome - ) - ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) - } + REPEAT_DENSITY ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome + ) + ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - if ( !exclude_workflow_steps.contains("gap_finder")) { - GAP_FINDER ( - YAML_INPUT.out.reference_ch - ) - ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) - } + GAP_FINDER ( + YAML_INPUT.out.reference_ch + ) + ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as // file to generate a file containing sites of self-complementary sequnce. // - if ( !exclude_workflow_steps.contains("selfcomp")) { - SELFCOMP ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.mummer_chunk, - YAML_INPUT.out.motif_len, - selfcomp_asfile - ) - ch_versions = ch_versions.mix( SELFCOMP.out.versions ) - } + SELFCOMP ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.mummer_chunk, + YAML_INPUT.out.motif_len, + selfcomp_asfile + ) + ch_versions = ch_versions.mix( SELFCOMP.out.versions ) // // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence // and generated a file of syntenic blocks. // - if ( !exclude_workflow_steps.contains("synteny")) { - SYNTENY ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.synteny_path - ) - ch_versions = ch_versions.mix( SYNTENY.out.versions ) - } + SYNTENY ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.synteny_path + ) + ch_versions = ch_versions.mix( SYNTENY.out.versions ) // // SUBWORKFLOW: Takes reference, pacbio reads // - if ( !exclude_workflow_steps.contains("read_coverage")) { - READ_COVERAGE ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.read_ch - ) - coverage_report = READ_COVERAGE.out.ch_reporting - ch_versions = ch_versions.mix(READ_COVERAGE.out.versions) - } else { - coverage_report = [] - } + READ_COVERAGE ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - if ( !exclude_workflow_steps.contains("telo_finder")) { - TELO_FINDER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.teloseq - ) - ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) - } + TELO_FINDER ( YAML_INPUT.out.reference_ch, + YAML_INPUT.out.teloseq + ) + ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) // // SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS // - if ( !exclude_workflow_steps.contains("busco")) { - BUSCO_ANNOTATION ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.lineageinfo, - YAML_INPUT.out.lineagespath, - buscogene_asfile, - ancestral_table - ) - ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) - } + BUSCO_ANNOTATION ( + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.lineageinfo, + YAML_INPUT.out.lineagespath, + buscogene_asfile, + ancestral_table + ) + ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - if ( !exclude_workflow_steps.contains("kmer")) { - KMER ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( KMER.out.versions ) - } + KMER ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( KMER.out.versions ) // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - if ( !exclude_workflow_steps.contains("hic_mapping")) { - HIC_MAPPING ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads_ch, - YAML_INPUT.out.assembly_id, - GAP_FINDER.out.gap_file, - READ_COVERAGE.out.ch_covbw_nor, - READ_COVERAGE.out.ch_covbw_avg, - TELO_FINDER.out.bedgraph_file, - REPEAT_DENSITY.out.repeat_density, - params.entry - ) - ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) - hic_report = HIC_MAPPING.out.ch_reporting - } else { - hic_report = [] - } + HIC_MAPPING ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads_ch, + YAML_INPUT.out.assembly_id, + GAP_FINDER.out.gap_file, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_avg, + TELO_FINDER.out.bedgraph_file, + REPEAT_DENSITY.out.repeat_density, + params.entry + ) + ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) // // SUBWORKFLOW: Collates version data from prior subworflows @@ -292,8 +256,8 @@ workflow TREEVAL { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( coverage_report ) - .combine( hic_report ) + .combine( READ_COVERAGE.out.ch_reporting ) + .combine( HIC_MAPPING.out.ch_reporting ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( diff --git a/workflows/treeval_jbrowse.nf b/workflows/treeval_jbrowse.nf deleted file mode 100755 index 62d20c7b..00000000 --- a/workflows/treeval_jbrowse.nf +++ /dev/null @@ -1,212 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowTreeval.initialise(params, log) - -// Check input path parameters to see if they exist -// params.input is the treeval yaml -def checkPathParamList = [ params.input ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// IMPORT: SUBWORKFLOWS CALLED BY THE MAIN -// -include { YAML_INPUT } from '../subworkflows/local/yaml_input' -include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' -include { INSILICO_DIGEST } from '../subworkflows/local/insilico_digest' -include { GENE_ALIGNMENT } from '../subworkflows/local/gene_alignment' -include { SELFCOMP } from '../subworkflows/local/selfcomp' -include { SYNTENY } from '../subworkflows/local/synteny' -include { BUSCO_ANNOTATION } from '../subworkflows/local/busco_annotation' -include { KMER } from '../subworkflows/local/kmer' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// IMPORT: Installed directly from nf-core/modules -// -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow TREEVAL_JBROWSE { - main: - // - // PRE-PIPELINE CHANNEL SETTING - channel setting for required files - // - ch_versions = Channel.empty() - - params.entry = 'JBROWSE' - input_ch = Channel.fromPath(params.input, checkIfExists: true) - - Channel - .fromPath( "${projectDir}/assets/gene_alignment/assm_*.as", checkIfExists: true) - .map { it -> - tuple ([ type : it.toString().split('/')[-1].split('_')[-1].split('.as')[0] ], - file(it) - )} - .set { gene_alignment_asfiles } - - Channel - .fromPath( "${projectDir}/assets/digest/digest.as", checkIfExists: true ) - .set { digest_asfile } - - Channel - .fromPath( "${projectDir}/assets/self_comp/selfcomp.as", checkIfExists: true ) - .set { selfcomp_asfile } - - Channel - .fromPath( "${projectDir}/assets/busco_gene/busco.as", checkIfExists: true ) - .set { buscogene_asfile } - - Channel - .fromPath( "${projectDir}/assets/busco_gene/lep_ancestral.tsv", checkIfExists: true ) - .set { ancestral_table } - - // - // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field - // - YAML_INPUT ( - input_ch, - params.entry - ) - - // - // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file - // - GENERATE_GENOME ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.map_order_ch - ) - ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions ) - - // - // SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate - // file with enzymatic digest sites. - // - ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) - - INSILICO_DIGEST ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - ch_enzyme, - digest_asfile - ) - ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions ) - - // - // SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS - // ON THOSE CHUNKS - // THIS WILL BE REQUIRED FOR LARGER GENOMES EST > 6GB - // - // REFERENCE_GENOME_SPLIT --> SELFCOMP - // --> GENE_ALIGNMENT - // BOTH WOULD REQUIRE A POST SUBWORKFLOW MERGE STEP TO MERGE TOGETHER THE SCAFFOLD - // BASED ALIGNMENTS/SELFCOMPS INTO A GENOME REPRESENTATIVE ONE. - // FOR GENE ALIGNMENT WOULD THIS REQUIRE A .GENOME FILE AND INDEX PER SCAFFOLD? - - // - // SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data - // - GENE_ALIGNMENT ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - YAML_INPUT.out.align_data_dir, - YAML_INPUT.out.align_geneset, - YAML_INPUT.out.align_common, - YAML_INPUT.out.intron_size, - gene_alignment_asfiles - ) - ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions) - - // - // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as - // file to generate a file containing sites of self-complementary sequnce. - // - SELFCOMP ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.mummer_chunk, - YAML_INPUT.out.motif_len, - selfcomp_asfile - ) - ch_versions = ch_versions.mix( SELFCOMP.out.versions ) - - // - // SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence - // and generated a file of syntenic blocks. - // - SYNTENY ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.synteny_path - ) - ch_versions = ch_versions.mix( SYNTENY.out.versions ) - - // - // SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS - // - BUSCO_ANNOTATION ( - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.lineageinfo, - YAML_INPUT.out.lineagespath, - buscogene_asfile, - ancestral_table - ) - ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions ) - - // - // SUBWORKFLOW: Takes reads and assembly, produces kmer plot - // - KMER ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( KMER.out.versions ) - - // - // SUBWORKFLOW: Collates version data from prior subworflows - // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - emit: - software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml - versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// PIPELINE ENTRYPOINT SUBWORKFLOWS WILL USE THE IMPLICIT ONCOMPLETE BLOCK - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index 0771a72d..8a483b5e 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -53,14 +53,6 @@ workflow TREEVAL_RAPID { main: ch_versions = Channel.empty() - exclude_workflow_steps = params.steps ? params.steps.split(",") : "NONE" - - full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] - - if (!full_list.containsAll(exclude_workflow_steps)) { - exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" - } - params.entry = 'RAPID' input_ch = Channel.fromPath(params.input, checkIfExists: true) // @@ -83,69 +75,55 @@ workflow TREEVAL_RAPID { // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - - if ( !exclude_workflow_steps.contains("repeat_density")) { - REPEAT_DENSITY ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome - ) - ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) - } + REPEAT_DENSITY ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome + ) + ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - if ( !exclude_workflow_steps.contains("gap_finder")) { - GAP_FINDER ( - YAML_INPUT.out.reference_ch - ) - ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) - } + GAP_FINDER ( + YAML_INPUT.out.reference_ch + ) + ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - if ( !exclude_workflow_steps.contains("telo_finder")) { - TELO_FINDER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.teloseq - ) - ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) - } + TELO_FINDER ( YAML_INPUT.out.reference_ch, + YAML_INPUT.out.teloseq + ) + ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) // // SUBWORKFLOW: Takes reference, pacbio reads // - if ( !exclude_workflow_steps.contains("read_coverage")) { - READ_COVERAGE ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) - } else { - coverage_report = [] - } + READ_COVERAGE ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) + // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - if ( !exclude_workflow_steps.contains("hic_mapping")) { - HIC_MAPPING ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads_ch, - YAML_INPUT.out.assembly_id, - GAP_FINDER.out.gap_file, - READ_COVERAGE.out.ch_covbw_nor, - READ_COVERAGE.out.ch_covbw_avg, - TELO_FINDER.out.bedgraph_file, - REPEAT_DENSITY.out.repeat_density, - params.entry - ) - ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) - } else { - hic_report = [] - } + HIC_MAPPING ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads_ch, + YAML_INPUT.out.assembly_id, + GAP_FINDER.out.gap_file, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_avg, + TELO_FINDER.out.bedgraph_file, + REPEAT_DENSITY.out.repeat_density, + params.entry + ) + ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) // // SUBWORKFLOW: Collates version data from prior subworflows @@ -158,8 +136,8 @@ workflow TREEVAL_RAPID { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( coverage_report ) - .combine( hic_report ) + .combine( READ_COVERAGE.out.ch_reporting ) + .combine( HIC_MAPPING.out.ch_reporting ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( diff --git a/workflows/treeval_rapid_tol.nf b/workflows/treeval_rapid_tol.nf index 4929fd16..5d651a1a 100755 --- a/workflows/treeval_rapid_tol.nf +++ b/workflows/treeval_rapid_tol.nf @@ -53,14 +53,6 @@ workflow TREEVAL_RAPID_TOL { main: ch_versions = Channel.empty() - exclude_workflow_steps = params.exclude ? params.exclude.split(",") : "NONE" - - full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", "NONE"] - - if (!full_list.containsAll(exclude_workflow_steps)) { - exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list" - } - params.entry = 'RAPID_TOL' input_ch = Channel.fromPath(params.input, checkIfExists: true) // @@ -83,80 +75,64 @@ workflow TREEVAL_RAPID_TOL { // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // - if ( !exclude_workflow_steps.contains("repeat_density")) { - REPEAT_DENSITY ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome - ) - ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) - } + REPEAT_DENSITY ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome + ) + ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions ) // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // - if ( !exclude_workflow_steps.contains("gap_finder")) { - GAP_FINDER ( - YAML_INPUT.out.reference_ch - ) - ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) - } + GAP_FINDER ( + YAML_INPUT.out.reference_ch + ) + ch_versions = ch_versions.mix( GAP_FINDER.out.versions ) // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - if ( !exclude_workflow_steps.contains("telo_finder")) { - TELO_FINDER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.teloseq - ) - ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) - } + TELO_FINDER ( YAML_INPUT.out.reference_ch, + YAML_INPUT.out.teloseq + ) + ch_versions = ch_versions.mix( TELO_FINDER.out.versions ) // // SUBWORKFLOW: Takes reference, pacbio reads // - if ( !exclude_workflow_steps.contains("read_coverage")) { - READ_COVERAGE ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) - } else { - coverage_report = [] - } + READ_COVERAGE ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - if ( !exclude_workflow_steps.contains("kmer")) { - KMER ( - YAML_INPUT.out.reference_ch, - YAML_INPUT.out.read_ch - ) - ch_versions = ch_versions.mix( KMER.out.versions ) - } + KMER ( + YAML_INPUT.out.reference_ch, + YAML_INPUT.out.read_ch + ) + ch_versions = ch_versions.mix( KMER.out.versions ) // // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // - if ( !exclude_workflow_steps.contains("hic_mapping")) { - HIC_MAPPING ( - YAML_INPUT.out.reference_ch, - GENERATE_GENOME.out.ref_index, - GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.hic_reads_ch, - YAML_INPUT.out.assembly_id, - GAP_FINDER.out.gap_file, - READ_COVERAGE.out.ch_covbw_nor, - READ_COVERAGE.out.ch_covbw_avg, - TELO_FINDER.out.bedgraph_file, - REPEAT_DENSITY.out.repeat_density, - params.entry - ) - ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) - } else { - hic_report = [] - } + HIC_MAPPING ( + YAML_INPUT.out.reference_ch, + GENERATE_GENOME.out.ref_index, + GENERATE_GENOME.out.dot_genome, + YAML_INPUT.out.hic_reads_ch, + YAML_INPUT.out.assembly_id, + GAP_FINDER.out.gap_file, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_avg, + TELO_FINDER.out.bedgraph_file, + REPEAT_DENSITY.out.repeat_density, + params.entry + ) + ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) // // SUBWORKFLOW: Collates version data from prior subworflows @@ -169,8 +145,8 @@ workflow TREEVAL_RAPID_TOL { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( coverage_report ) - .combine( hic_report ) + .combine( READ_COVERAGE.out.ch_reporting ) + .combine( HIC_MAPPING.out.ch_reporting ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( From f27d0c5cdad32438dd110888d9a259d41b7ed6d3 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 12:46:01 +0000 Subject: [PATCH 02/16] Updating ignored --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index fb2aa578..2e860182 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,8 @@ output/ null/ error out +OscSUBSET-TEST +OscSUBSET-TEST-MODULE-UPDATE +OscSUBSET-TEST-MODULE-UPDATE-3 +TreeValTinyData +yaml From a36a6b77e8bba0e28afb57ff7cc9f5905cd02502 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 13:38:12 +0000 Subject: [PATCH 03/16] Updates to container again, and java.math not getting recognised --- modules/local/generate_cram_csv.nf | 4 +++- subworkflows/local/selfcomp.nf | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index ad90eaf5..045160b7 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -2,7 +2,9 @@ process GENERATE_CRAM_CSV { tag "${meta.id}" label 'process_tiny' - container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" input: tuple val(meta), path(crampath) diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index bfb10848..9c679b77 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -49,7 +49,7 @@ workflow SELFCOMP { file_size .sum{it / 1e9} - .collect { new java.math.BigDecimal (it).setScale(0, RoundingMode.UP) } + .collect { new java.math.BigDecimal (it).setScale(0, java.math.RoundingMode.UP) } .flatten() .set { chunk_number } From 44200e52647b304c8222b64a0aa2838ca635f0ab Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 13:46:00 +0000 Subject: [PATCH 04/16] BigDecimal is now overloading? --- subworkflows/local/selfcomp.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index 9c679b77..f7606440 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -49,8 +49,7 @@ workflow SELFCOMP { file_size .sum{it / 1e9} - .collect { new java.math.BigDecimal (it).setScale(0, java.math.RoundingMode.UP) } - .flatten() + .map { it -> new java.math.BigDecimal (it).setScale(0, java.math.RoundingMode.UP) } .set { chunk_number } // From 934e9e0b3412966b19eb95edf7add8c441f51eb3 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 13:55:09 +0000 Subject: [PATCH 05/16] hic_report is missing --- workflows/treeval.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/workflows/treeval.nf b/workflows/treeval.nf index b90ce7e5..b3406b81 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -276,6 +276,7 @@ workflow TREEVAL { REPEAT_DENSITY.out.repeat_density, params.entry ) + hic_report = ch_reporting ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) } else { hic_report = [] From c877ca2e8f152fff69ef16b4abed40868c495ee8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 14:48:01 +0000 Subject: [PATCH 06/16] hic_report change --- workflows/treeval.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/treeval.nf b/workflows/treeval.nf index b3406b81..d616f491 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -276,7 +276,7 @@ workflow TREEVAL { REPEAT_DENSITY.out.repeat_density, params.entry ) - hic_report = ch_reporting + hic_report = HIC_MAPPING.out.ch_reporting ch_versions = ch_versions.mix( HIC_MAPPING.out.versions ) } else { hic_report = [] From 8198e89ce0bd0df7af8a86d3653c5a259f2cfb2b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 14:55:18 +0000 Subject: [PATCH 07/16] Updates to container --- modules/local/graphoverallcoverage.nf | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/local/graphoverallcoverage.nf b/modules/local/graphoverallcoverage.nf index b8cc8777..10c0a112 100755 --- a/modules/local/graphoverallcoverage.nf +++ b/modules/local/graphoverallcoverage.nf @@ -2,7 +2,10 @@ process GRAPHOVERALLCOVERAGE { tag "$meta.id" label "process_single" - container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' + conda "conda-forge::perl=5.26.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl:5.26.2' : + 'biocontainers/perl:5.26.2' }" input: tuple val(meta), path(bed) From 8a24f1410a2127bccddcf848f486916467c5cdb8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Dec 2024 15:28:13 +0000 Subject: [PATCH 08/16] Adding default for steps in config --- nextflow.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index f9d4c973..2f1bcd14 100755 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,7 @@ params { show_hidden_params = false schema_ignore_params = 'genomes' binfile = false + steps = 'ALL' // Config options custom_config_version = 'master' @@ -254,4 +255,4 @@ def round_up ( decimal_places, file, multiplier ){ } else { return dived * multiplier } -} \ No newline at end of file +} From 0dcc269423b98ce413a815e85e7f3df816c5db2a Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 11 Dec 2024 09:33:54 +0000 Subject: [PATCH 09/16] All should have been None --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 2f1bcd14..2af4a6fc 100755 --- a/nextflow.config +++ b/nextflow.config @@ -27,7 +27,7 @@ params { show_hidden_params = false schema_ignore_params = 'genomes' binfile = false - steps = 'ALL' + steps = 'NONE' // Config options custom_config_version = 'master' From 1109a05047ce845da97f78e95ed22241b666b587 Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Wed, 11 Dec 2024 12:50:30 +0000 Subject: [PATCH 10/16] Update ci.yml --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b613b70..93ab347e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/treeval') }}" - runs-on: ubuntu2204-8c + runs-on: [ubuntu-latest] strategy: matrix: NXF_VER: From 17996bb69f45a68438256b88e2be69d7199d224d Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Dec 2024 10:20:28 +0000 Subject: [PATCH 11/16] Testing so we can see priv levels --- .github/workflows/ci.yml | 68 ++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 93ab347e..0bcb006c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,41 +35,47 @@ jobs: id: branch-names uses: tj-actions/branch-names@v8 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" + - name: Get privilege Level + run: | + grep -q 'kernel.unprivileged_userns_clone=1' /etc/sysctl.d/90-unprivileged_userns.conf && \ + echo "User namespaces enabled, continue the Apptainer installation" || \ + echo "User namespaces NOT enabled, your use of Apptainer will be very limited" - - name: Setup apptainer - uses: eWaterCycle/setup-apptainer@main + # - name: Install Nextflow + # uses: nf-core/setup-nextflow@v2 + # with: + # version: "${{ matrix.NXF_VER }}" - - name: Set up Singularity - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR + # - name: Setup apptainer + # uses: eWaterCycle/setup-apptainer@main - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" + # - name: Set up Singularity + # run: | + # mkdir -p $NXF_SINGULARITY_CACHEDIR + # mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - name: Install nf-core - run: | - pip install nf-core + # - name: Install Python + # uses: actions/setup-python@v5 + # with: + # python-version: "3.10" - - name: NF-Core Download - download singularity containers - # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found - # Must occur after singularity install or will crash trying to dl containers - # Zip up this fresh download and run the checked out version - run: | - nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system singularity + # - name: Install nf-core + # run: | + # pip install nf-core - - name: Download Tiny test data - # Download A fungal test data set that is full enough to show some real output. - run: | - curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - + # - name: NF-Core Download - download singularity containers + # # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found + # # Must occur after singularity install or will crash trying to dl containers + # # Zip up this fresh download and run the checked out version + # run: | + # nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system singularity - - name: Singularity - Run FULL pipeline with test data - # Remember that you can parallelise this by using strategy.matrix - run: | - nextflow run ./sanger-treeval/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test_github,singularity --outdir ./Sing-Full + # - name: Download Tiny test data + # # Download A fungal test data set that is full enough to show some real output. + # run: | + # curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - + + # - name: Singularity - Run FULL pipeline with test data + # # Remember that you can parallelise this by using strategy.matrix + # run: | + # nextflow run ./sanger-treeval/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test_github,singularity --outdir ./Sing-Full From d5b7a1e8cf5c604725cf0ad3309d20d27f7c7354 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Dec 2024 10:24:29 +0000 Subject: [PATCH 12/16] Testing so we can see priv levels - correct order --- .github/workflows/ci.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0bcb006c..914eafc6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,19 +35,19 @@ jobs: id: branch-names uses: tj-actions/branch-names@v8 - - name: Get privilege Level - run: | - grep -q 'kernel.unprivileged_userns_clone=1' /etc/sysctl.d/90-unprivileged_userns.conf && \ - echo "User namespaces enabled, continue the Apptainer installation" || \ - echo "User namespaces NOT enabled, your use of Apptainer will be very limited" - # - name: Install Nextflow # uses: nf-core/setup-nextflow@v2 # with: # version: "${{ matrix.NXF_VER }}" - # - name: Setup apptainer - # uses: eWaterCycle/setup-apptainer@main + - name: Setup apptainer + uses: eWaterCycle/setup-apptainer@main + + - name: Get privilege Level + run: | + grep -q 'kernel.unprivileged_userns_clone=1' /etc/sysctl.d/90-unprivileged_userns.conf && \ + echo "User namespaces enabled, continue the Apptainer installation" || \ + echo "User namespaces NOT enabled, your use of Apptainer will be very limited" # - name: Set up Singularity # run: | From e611af4b324ec550b7284c4965a0042053ccc308 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Dec 2024 10:41:47 +0000 Subject: [PATCH 13/16] Testing so we can see priv levels --- .github/workflows/ci.yml | 65 +++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 914eafc6..3c00bb31 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,47 +35,38 @@ jobs: id: branch-names uses: tj-actions/branch-names@v8 - # - name: Install Nextflow - # uses: nf-core/setup-nextflow@v2 - # with: - # version: "${{ matrix.NXF_VER }}" + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" - - name: Setup apptainer - uses: eWaterCycle/setup-apptainer@main - - - name: Get privilege Level + - name: Set up Singularity run: | - grep -q 'kernel.unprivileged_userns_clone=1' /etc/sysctl.d/90-unprivileged_userns.conf && \ - echo "User namespaces enabled, continue the Apptainer installation" || \ - echo "User namespaces NOT enabled, your use of Apptainer will be very limited" - - # - name: Set up Singularity - # run: | - # mkdir -p $NXF_SINGULARITY_CACHEDIR - # mkdir -p $NXF_SINGULARITY_LIBRARYDIR + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR - # - name: Install Python - # uses: actions/setup-python@v5 - # with: - # python-version: "3.10" + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" - # - name: Install nf-core - # run: | - # pip install nf-core + - name: Install nf-core + run: | + pip install nf-core - # - name: NF-Core Download - download singularity containers - # # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found - # # Must occur after singularity install or will crash trying to dl containers - # # Zip up this fresh download and run the checked out version - # run: | - # nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system singularity + - name: NF-Core Download - download singularity containers + # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found + # Must occur after singularity install or will crash trying to dl containers + # Zip up this fresh download and run the checked out version + run: | + nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system singularity - # - name: Download Tiny test data - # # Download A fungal test data set that is full enough to show some real output. - # run: | - # curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - + - name: Download Tiny test data + # Download A fungal test data set that is full enough to show some real output. + run: | + curl https://tolit.cog.sanger.ac.uk/test-data/resources/treeval/TreeValTinyData.tar.gz | tar xzf - - # - name: Singularity - Run FULL pipeline with test data - # # Remember that you can parallelise this by using strategy.matrix - # run: | - # nextflow run ./sanger-treeval/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test_github,singularity --outdir ./Sing-Full + - name: Singularity - Run FULL pipeline with test data + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ./sanger-treeval/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test_github,docker --outdir ./Sing-Full From 19689c987fedbb531dc9ae2fa79c4d8fe8bdfe51 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Dec 2024 10:43:42 +0000 Subject: [PATCH 14/16] Testing so we can see priv levels --- .github/workflows/ci.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c00bb31..62e8eb08 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,6 +35,12 @@ jobs: id: branch-names uses: tj-actions/branch-names@v8 + - name: Get privilege Level + run: | + grep -q 'kernel.unprivileged_userns_clone=1' /etc/sysctl.d/90-unprivileged_userns.conf && \ + echo "User namespaces enabled, continue the Apptainer installation" || \ + echo "User namespaces NOT enabled, your use of Apptainer will be very limited" + - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: @@ -59,7 +65,7 @@ jobs: # Must occur after singularity install or will crash trying to dl containers # Zip up this fresh download and run the checked out version run: | - nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system singularity + nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. From 94228498b57959f4038b33b73a499b024d78d336 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Dec 2024 10:48:43 +0000 Subject: [PATCH 15/16] Testing so we can see priv levels --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 62e8eb08..25818112 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,6 +46,9 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + - name: Setup apptainer + uses: eWaterCycle/setup-apptainer@main + - name: Set up Singularity run: | mkdir -p $NXF_SINGULARITY_CACHEDIR @@ -65,7 +68,7 @@ jobs: # Must occur after singularity install or will crash trying to dl containers # Zip up this fresh download and run the checked out version run: | - nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval + nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system none - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. From c98db8dcf38ba87b071162545257656d3372b4d7 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Dec 2024 10:55:56 +0000 Subject: [PATCH 16/16] Testing so we can see priv levels --- .github/workflows/ci.yml | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 25818112..06ac277d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,47 +29,14 @@ jobs: - "22.10.1" - "latest-everything" steps: - - name: Get branch names - # Pulls the names of current branches in repo - # steps.branch-names.outputs.current_branch is used later and returns the name of the branch the PR is made FROM not to - id: branch-names - uses: tj-actions/branch-names@v8 - - - name: Get privilege Level - run: | - grep -q 'kernel.unprivileged_userns_clone=1' /etc/sysctl.d/90-unprivileged_userns.conf && \ - echo "User namespaces enabled, continue the Apptainer installation" || \ - echo "User namespaces NOT enabled, your use of Apptainer will be very limited" + - name: Check out pipeline code + uses: actions/checkout@v4 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Setup apptainer - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Install Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install nf-core - run: | - pip install nf-core - - - name: NF-Core Download - download singularity containers - # Forcibly download repo on active branch and download SINGULARITY containers into the CACHE dir if not found - # Must occur after singularity install or will crash trying to dl containers - # Zip up this fresh download and run the checked out version - run: | - nf-core download sanger-tol/treeval --revision ${{ steps.branch-names.outputs.current_branch }} --compress none -d --force --outdir sanger-treeval --container-cache-utilisation amend --container-system none - - name: Download Tiny test data # Download A fungal test data set that is full enough to show some real output. run: | @@ -78,4 +45,4 @@ jobs: - name: Singularity - Run FULL pipeline with test data # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ./sanger-treeval/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test_github,docker --outdir ./Sing-Full + nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./Sing-Full