From 9268b77c6faf11f2cd0ca17a11fa90aa6e4a21c5 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 21 Nov 2024 15:13:47 +0000 Subject: [PATCH 01/12] Minor updates --- CHANGELOG.md | 17 +++++++++++------ subworkflows/local/read_coverage.nf | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b20fdd83..fd69b777 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,11 +13,13 @@ Our 3rd release for sanger-tol/treeval. - Adds a JBrowse Only workflow (this will lead to an update to the FULL workflow which can now call JBROWSE_ONLY and RAPID). - Updates to containers (local modules) to remove Anaconda dependencies following policy changes. - Updates to modules to remove Anaconda dependencies following policy changes + - The majority of these updates only remove the `default` channel from the environment.yml - CONDA warnings for modules which cannot use CONDA. - Removable of a liberal use of spaces. - reformat_intersect was previously not outputing version data. - Adding arch specification to Pretext GitHub actions runner. Hopefully this will stop the spurious errors we see on there. - Addition of steps into schema. +- Adds *ktab as an output. ### Parameters @@ -32,8 +34,8 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i | Module | Old Version | New Versions | | -------------------------------------- | ---------------- | ------------ | | bamtobed_sort ( bedtools + samtools ) | 2.31.0 + 1.17 | | -| bedtools | 2.31.1 | | -| busco | 5.5.0 | | +| bedtools | 2.31.1 | - | +| busco* | 5.5.0 | - | | bwa-mem2 | 2.2.1 | | | cat | 2.3.4 | | | chunk_fasta ( pyfasta ) | 0.5.2-1 | | @@ -45,23 +47,26 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i | gcc | 10.4.0 | | | find_telomere_windows ( java-jdk ) | 8.0.112 | | | generate_cram_csv ( samtools ) | 1.17 | | -| gnu-sort | 8.25 | | +| gnu-sort | 8.25 | 9.3 | | juicer_tools_pre ( java-jdk ) | 8.0.112 | | | perl | 5.26.2 | | | merquryfk | 1.0.1 | | | minimap2 + samtools | 2.24 + 1.14 | | +| minimap2_index | 2.24 | 2.28 | | miniprot | 0.11--he4a0461_2 | | | mummer | 3.23 | | | paftools ( minimap2 + samtools ) | 2.24 + 1.14 | | -| pretextmap + samtools | 0.0.2 + 1.17 | | +| pretextmap + samtools | 0.0.2 + 1.17 | 0.1.9 + 1.17 | | python | 3.9 | - | | - pandas | 1.5.2 | - | | samtools | 1.18 | 1.21 | | selfcomp_splitfasta ( perl-bioperl ) | 1.7.8-1 | | | seqtk | 1.4 | | | tabix | 1.11 | | -| ucsc | 377 | | -| windowmasker (blast) | 2.14.0 | | +| ucsc | 377 | 447 | +| windowmasker (blast) | 2.14.0 | 2.15.0 | + +* busco is currently pinned to v5.5.0 - Upgrading v5.7.1 would cause github actions to crash. Further investigation needed. ## [1.1.1] - Ancient Aurora (H1) - [2024-04-26] diff --git a/subworkflows/local/read_coverage.nf b/subworkflows/local/read_coverage.nf index 4da5b0d9..6f553160 100755 --- a/subworkflows/local/read_coverage.nf +++ b/subworkflows/local/read_coverage.nf @@ -132,7 +132,6 @@ workflow READ_COVERAGE { } .set { genomecov_input } - // // MODULE: Genome2Cov @@ -140,7 +139,8 @@ workflow READ_COVERAGE { BEDTOOLS_GENOMECOV( genomecov_input.input_tuple, genomecov_input.dot_genome, - genomecov_input.file_suffix + genomecov_input.file_suffix, + false ) ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions) From cc96248aa883c81e1201544ff016c7edbd609126 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 22 Nov 2024 13:22:27 +0000 Subject: [PATCH 02/12] Updating last of NF-core modules --- modules.json | 15 +- .../bedtools/genomecov/environment.yml | 2 - modules/nf-core/bedtools/genomecov/main.nf | 13 +- modules/nf-core/bedtools/genomecov/meta.yml | 72 +-- .../bedtools/genomecov/tests/main.nf.test | 92 +++- .../genomecov/tests/main.nf.test.snap | 131 ++++- .../nf-core/bedtools/genomecov/tests/tags.yml | 2 - modules/nf-core/bedtools/map/environment.yml | 2 - modules/nf-core/bedtools/map/meta.yml | 70 +-- .../nf-core/bedtools/map/tests/main.nf.test | 12 +- modules/nf-core/cooler/cload/environment.yml | 2 - modules/nf-core/cooler/cload/meta.yml | 68 +-- .../nf-core/cooler/zoomify/environment.yml | 2 - modules/nf-core/cooler/zoomify/meta.yml | 45 +- modules/nf-core/fastk/fastk/environment.yml | 5 +- .../merquryfk/merquryfk/environment.yml | 5 +- .../nf-core/minimap2/align/environment.yml | 8 + modules/nf-core/minimap2/align/main.nf | 53 +- modules/nf-core/minimap2/align/meta.yml | 112 +++-- .../nf-core/minimap2/align/tests/main.nf.test | 441 ++++++++++++++++ .../minimap2/align/tests/main.nf.test.snap | 476 ++++++++++++++++++ modules/nf-core/minimap2/align/tests/tags.yml | 2 + modules/nf-core/mummer/environment.yml | 5 + modules/nf-core/mummer/main.nf | 6 +- modules/nf-core/mummer/meta.yml | 60 +-- modules/nf-core/mummer/mummer.diff | 14 - modules/nf-core/mummer/tests/main.nf.test | 58 +++ .../nf-core/mummer/tests/main.nf.test.snap | 68 +++ subworkflows/local/read_coverage.nf | 4 +- subworkflows/local/synteny.nf | 4 +- 30 files changed, 1563 insertions(+), 286 deletions(-) delete mode 100644 modules/nf-core/bedtools/genomecov/tests/tags.yml create mode 100644 modules/nf-core/minimap2/align/environment.yml create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test.snap create mode 100644 modules/nf-core/minimap2/align/tests/tags.yml create mode 100644 modules/nf-core/mummer/environment.yml delete mode 100644 modules/nf-core/mummer/mummer.diff create mode 100644 modules/nf-core/mummer/tests/main.nf.test create mode 100644 modules/nf-core/mummer/tests/main.nf.test.snap diff --git a/modules.json b/modules.json index 1296f7df..ed38c68e 100755 --- a/modules.json +++ b/modules.json @@ -12,7 +12,7 @@ }, "bedtools/genomecov": { "branch": "master", - "git_sha": "575e1bc54b083fb15e7dd8b5fcc40bea60e8ce83", + "git_sha": "bfa8975eefb8df3e480a44ac9e594f23f52b2963", "installed_by": ["modules"] }, "bedtools/intersect": { @@ -27,7 +27,7 @@ }, "bedtools/map": { "branch": "master", - "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "bedtools/merge": { @@ -57,12 +57,12 @@ }, "cooler/cload": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { @@ -93,7 +93,7 @@ }, "minimap2/align": { "branch": "master", - "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, @@ -114,9 +114,8 @@ }, "mummer": { "branch": "master", - "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"], - "patch": "modules/nf-core/mummer/mummer.diff" + "git_sha": "a15872dde4c4affaac2dfee6c3c65fadab4719bd", + "installed_by": ["modules"] }, "paftools/sam2paf": { "branch": "master", diff --git a/modules/nf-core/bedtools/genomecov/environment.yml b/modules/nf-core/bedtools/genomecov/environment.yml index 8fbe20c3..5683bc05 100644 --- a/modules/nf-core/bedtools/genomecov/environment.yml +++ b/modules/nf-core/bedtools/genomecov/environment.yml @@ -1,7 +1,5 @@ -name: bedtools_genomecov channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/genomecov/main.nf b/modules/nf-core/bedtools/genomecov/main.nf index 7a4d9c45..35e2ab14 100644 --- a/modules/nf-core/bedtools/genomecov/main.nf +++ b/modules/nf-core/bedtools/genomecov/main.nf @@ -4,13 +4,14 @@ process BEDTOOLS_GENOMECOV { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : - 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data': + 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b' }" input: tuple val(meta), path(intervals), val(scale) path sizes val extension + val sort output: tuple val(meta), path("*.${extension}"), emit: genomecov @@ -20,12 +21,16 @@ process BEDTOOLS_GENOMECOV { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args_list = args.tokenize() args += (scale > 0 && scale != 1) ? " -scale $scale" : "" if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { args += " -bg" } + // Sorts output file by chromosome and position using additional options for performance and consistency + // See https://www.biostars.org/p/66927/ for further details + def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' + def sort_cmd = sort ? "| LC_ALL=C sort --parallel=$task.cpus $buffer -k1,1 -k2,2n" : '' def prefix = task.ext.prefix ?: "${meta.id}" if (intervals.name =~ /\.bam/) { @@ -34,6 +39,7 @@ process BEDTOOLS_GENOMECOV { genomecov \\ -ibam $intervals \\ $args \\ + $sort_cmd \\ > ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml @@ -48,6 +54,7 @@ process BEDTOOLS_GENOMECOV { -i $intervals \\ -g $sizes \\ $args \\ + $sort_cmd \\ > ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/bedtools/genomecov/meta.yml b/modules/nf-core/bedtools/genomecov/meta.yml index 2b2385e3..41b1f8f4 100644 --- a/modules/nf-core/bedtools/genomecov/meta.yml +++ b/modules/nf-core/bedtools/genomecov/meta.yml @@ -1,5 +1,6 @@ name: bedtools_genomecov -description: Computes histograms (default), per-base reports (-d) and BEDGRAPH (-bg) summaries of feature coverage (e.g., aligned sequences) for a given genome. +description: Computes histograms (default), per-base reports (-d) and BEDGRAPH (-bg) + summaries of feature coverage (e.g., aligned sequences) for a given genome. keywords: - bed - bam @@ -12,39 +13,50 @@ tools: A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. documentation: https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html licence: ["MIT"] + identifier: biotools:bedtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bam|bed|gff|vcf}" - - scale: - type: integer - description: Number containing the scale factor for the output. Set to 1 to disable. Setting to a value other than 1 will also get the -bg bedgraph output format as this is required for this command switch - - sizes: - type: file - description: Tab-delimited table of chromosome names in the first column and chromosome sizes in the second column - - extension: - type: string - description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", ".tab", etc.) It is set arbitrarily by the user and corresponds to the file format which depends on arguments. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bam|bed|gff|vcf}" + - scale: + type: integer + description: Number containing the scale factor for the output. Set to 1 to + disable. Setting to a value other than 1 will also get the -bg bedgraph output + format as this is required for this command switch + - - sizes: + type: file + description: Tab-delimited table of chromosome names in the first column and + chromosome sizes in the second column + - - extension: + type: string + description: Extension of the output file (e. g., ".bg", ".bedgraph", ".txt", + ".tab", etc.) It is set arbitrarily by the user and corresponds to the file + format which depends on arguments. + - - sort: + type: boolean + description: Sort the output output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - genomecov: - type: file - description: Computed genome coverage file - pattern: "*.${extension}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${extension}": + type: file + description: Computed genome coverage file + pattern: "*.${extension}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@edmundmiller" - "@sruthipsuresh" diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test b/modules/nf-core/bedtools/genomecov/tests/main.nf.test index 21e69aed..16a03492 100644 --- a/modules/nf-core/bedtools/genomecov/tests/main.nf.test +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test @@ -9,19 +9,20 @@ nextflow_process { tag "bedtools" tag "bedtools/genomecov" - test("sarscov2 - no scale") { + test("sarscov2 - no scale") { when { process { - """ + """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), 1 ] // sizes input[1] = [] // extension input[2] = "txt" + input[3] = true """ } } @@ -29,25 +30,25 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("no_scale") } + { assert snapshot(process.out).match() } ) } - } test("sarscov2 - dummy sizes") { when { process { - """ + """ input[0] = [ [ id:'test'], - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), 0.5 ] // sizes input[1] = file('dummy_chromosome_sizes') // extension input[2] = 'txt' + input[3] = false """ } } @@ -55,25 +56,25 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("dummy_sizes") } + { assert snapshot(process.out).match() } ) } - } test("sarscov2 - scale") { when { process { - """ + """ input[0] = [ [ id:'test'], - file(params.test_data['sarscov2']['genome']['baits_bed'], checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/baits.bed", checkIfExists: true), 0.5 ] // sizes - input[1] = file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.sizes", checkIfExists: true) // extension input[2] = 'txt' + input[3] = false """ } } @@ -81,27 +82,55 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("scale") } + { assert snapshot(process.out).match() } ) } - } - test("stub") { + test("sarscov2 - no scale - stub") { options "-stub" when { process { - """ + """ input[0] = [ [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), 1 ] // sizes input[1] = [] // extension + input[2] = "txt" + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - dummy sizes - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/bam/test.paired_end.bam", checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file('dummy_chromosome_sizes') + // extension input[2] = 'txt' + input[3] = false """ } } @@ -109,10 +138,37 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.genomecov[0][1]).name).match("stub") } + { assert snapshot(process.out).match() } ) } + } + + test("sarscov2 - scale - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/bed/baits.bed", checkIfExists: true), + 0.5 + ] + // sizes + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.sizes", checkIfExists: true) + // extension + input[2] = 'txt' + input[3] = false + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } } } diff --git a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap index 8f9191e4..da6dbe87 100644 --- a/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap +++ b/modules/nf-core/bedtools/genomecov/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "dummy_sizes": { + "sarscov2 - dummy sizes": { "content": [ { "0": [ @@ -26,9 +26,13 @@ ] } ], - "timestamp": "2023-12-05T17:35:58.35232" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:33.898146" }, - "no_scale": { + "sarscov2 - no scale - stub": { "content": [ { "0": [ @@ -36,7 +40,7 @@ { "id": "test" }, - "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -47,7 +51,7 @@ { "id": "test" }, - "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -55,15 +59,46 @@ ] } ], - "timestamp": "2023-12-05T17:35:51.142496" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:52.483371" }, - "stub": { + "sarscov2 - scale": { "content": [ - "test.coverage.txt" + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } ], - "timestamp": "2023-12-05T17:36:13.084709" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:43.69501" }, - "scale": { + "sarscov2 - scale - stub": { "content": [ { "0": [ @@ -71,7 +106,7 @@ { "id": "test" }, - "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -82,7 +117,73 @@ { "id": "test" }, - "test.coverage.txt:md5,de3c59c0ea123bcdbbad27bc0a0a601e" + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:00:09.930036" + }, + "sarscov2 - no scale": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,66083198daca6c001d328ba9616e9b53" + ] + ], + "versions": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T11:59:25.448817" + }, + "sarscov2 - dummy sizes - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,5fd44452613992a6f71f2c73d2e117f2" + ], + "genomecov": [ + [ + { + "id": "test" + }, + "test.coverage.txt:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -90,6 +191,10 @@ ] } ], - "timestamp": "2023-12-05T17:36:05.962006" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T12:00:01.086433" } } \ No newline at end of file diff --git a/modules/nf-core/bedtools/genomecov/tests/tags.yml b/modules/nf-core/bedtools/genomecov/tests/tags.yml deleted file mode 100644 index 55fce478..00000000 --- a/modules/nf-core/bedtools/genomecov/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -bedtools/genomecov: - - "modules/nf-core/bedtools/genomecov/**" diff --git a/modules/nf-core/bedtools/map/environment.yml b/modules/nf-core/bedtools/map/environment.yml index f61ee028..5683bc05 100644 --- a/modules/nf-core/bedtools/map/environment.yml +++ b/modules/nf-core/bedtools/map/environment.yml @@ -1,7 +1,5 @@ -name: bedtools_map channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/map/meta.yml b/modules/nf-core/bedtools/map/meta.yml index 0267f6ff..4e56bb94 100644 --- a/modules/nf-core/bedtools/map/meta.yml +++ b/modules/nf-core/bedtools/map/meta.yml @@ -12,43 +12,47 @@ tools: A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. documentation: https://bedtools.readthedocs.io/en/latest/content/tools/map.html licence: ["MIT"] + identifier: biotools:bedtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals1: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bed|gff|vcf}" - - intervals2: - type: file - description: BAM/BED/GFF/VCF - pattern: "*.{bed|gff|vcf}" - - meta2: - type: map - description: | - Groovy Map containing reference chromosome sizes - e.g. [ id:'test' ] - - chrom_sizes: - type: file - description: Chromosome sizes file - pattern: "*{.sizes,.txt}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals1: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bed|gff|vcf}" + - intervals2: + type: file + description: BAM/BED/GFF/VCF + pattern: "*.{bed|gff|vcf}" + - - meta2: + type: map + description: | + Groovy Map containing reference chromosome sizes + e.g. [ id:'test' ] + - chrom_sizes: + type: file + description: Chromosome sizes file + pattern: "*{.sizes,.txt}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - mapped: - type: file - description: File containing the description of overlaps found between the features in A and the features in B, with statistics - pattern: "*.${extension}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${extension}": + type: file + description: File containing the description of overlaps found between the features + in A and the features in B, with statistics + pattern: "*.${extension}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@ekushele" maintainers: diff --git a/modules/nf-core/bedtools/map/tests/main.nf.test b/modules/nf-core/bedtools/map/tests/main.nf.test index 4adc0a21..e46cf743 100644 --- a/modules/nf-core/bedtools/map/tests/main.nf.test +++ b/modules/nf-core/bedtools/map/tests/main.nf.test @@ -18,8 +18,8 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true) ] input[1] = [[],[]] """ @@ -42,8 +42,8 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ] input[1] = [[],[]] """ @@ -67,8 +67,8 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true) ] input[1] = [[],[]] """ diff --git a/modules/nf-core/cooler/cload/environment.yml b/modules/nf-core/cooler/cload/environment.yml index 03abee73..f8165ca9 100644 --- a/modules/nf-core/cooler/cload/environment.yml +++ b/modules/nf-core/cooler/cload/environment.yml @@ -1,7 +1,5 @@ -name: cooler_cload channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/cload/meta.yml b/modules/nf-core/cooler/cload/meta.yml index fa5474ae..1bb9f748 100644 --- a/modules/nf-core/cooler/cload/meta.yml +++ b/modules/nf-core/cooler/cload/meta.yml @@ -13,41 +13,45 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-clause"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - pairs: - type: file - description: Path to contacts (i.e. read pairs) file. - - index: - type: file - description: Path to index file of the contacts. - - cool_bin: - type: integer - description: Bins size in bp - - chromsizes: - type: file - description: Path to a chromsizes file. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pairs: + type: file + description: Path to contacts (i.e. read pairs) file. + - index: + type: file + description: Path to index file of the contacts. + - cool_bin: + type: integer + description: Bins size in bp + - - chromsizes: + type: file + description: Path to a chromsizes file. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - version: - type: file - description: File containing software version - pattern: "versions.yml" - cool: - type: file - description: Output COOL file path - pattern: "*.cool" - - cool_bin: - type: integer - description: Bins size in bp + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cool": + type: file + description: Output COOL file path + pattern: "*.cool" + - cool_bin: + type: file + description: Output COOL file path + pattern: "*.cool" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jianhong" - "@muffato" diff --git a/modules/nf-core/cooler/zoomify/environment.yml b/modules/nf-core/cooler/zoomify/environment.yml index 2288f376..f8165ca9 100644 --- a/modules/nf-core/cooler/zoomify/environment.yml +++ b/modules/nf-core/cooler/zoomify/environment.yml @@ -1,7 +1,5 @@ -name: cooler_zoomify channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::cooler=0.9.2 diff --git a/modules/nf-core/cooler/zoomify/meta.yml b/modules/nf-core/cooler/zoomify/meta.yml index d87aaf29..3f928781 100644 --- a/modules/nf-core/cooler/zoomify/meta.yml +++ b/modules/nf-core/cooler/zoomify/meta.yml @@ -12,30 +12,33 @@ tools: tool_dev_url: https://github.com/open2c/cooler doi: "10.1093/bioinformatics/btz540" licence: ["BSD-3-clause"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - cool: - type: file - description: Path to COOL file - pattern: "*.{cool,mcool}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - cool: + type: file + description: Path to COOL file + pattern: "*.{cool,mcool}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - mcool: - type: file - description: Output mcool file - pattern: "*.mcool" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mcool": + type: file + description: Output mcool file + pattern: "*.mcool" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jianhong" maintainers: diff --git a/modules/nf-core/fastk/fastk/environment.yml b/modules/nf-core/fastk/fastk/environment.yml index 54c932fe..19ab079a 100644 --- a/modules/nf-core/fastk/fastk/environment.yml +++ b/modules/nf-core/fastk/fastk/environment.yml @@ -1,5 +1,4 @@ name: fastk_fastk channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda diff --git a/modules/nf-core/merquryfk/merquryfk/environment.yml b/modules/nf-core/merquryfk/merquryfk/environment.yml index 44a5ee9b..50d352bf 100644 --- a/modules/nf-core/merquryfk/merquryfk/environment.yml +++ b/modules/nf-core/merquryfk/merquryfk/environment.yml @@ -1,5 +1,4 @@ name: merquryfk_merquryfk channels: - - conda-forge - - bioconda - - defaults + - conda-forge + - bioconda diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 00000000..dc6476b7 --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,8 @@ +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.20 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index d7057674..c14d377d 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -1,43 +1,55 @@ process MINIMAP2_ALIGN { tag "$meta.id" - label 'process_medium' + label 'process_high' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index - conda "bioconda::minimap2=2.24 bioconda::samtools=1.14" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" input: tuple val(meta), path(reads) - path reference + tuple val(meta2), path(reference) val bam_format + val bam_index_extension val cigar_paf_format val cigar_bam - val bed_format + val bed_bool + output: - tuple val(meta), path("*.paf"), optional: true, emit: paf - tuple val(meta), path("*.bam"), optional: true, emit: bam - tuple val(meta), path("*.bed"), optional: true, emit: bed - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + tuple val(meta), path("*.bed") , optional: true, emit: bed + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : bed_format ? "| paftools.js splice2bed - > ${prefix}.bed " : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) """ + $samtools_reset_fastq \\ minimap2 \\ $args \\ -t $task.cpus \\ - "${reference ?: reads}" \\ - "$reads" \\ + $target \\ + $query \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output @@ -46,23 +58,24 @@ process MINIMAP2_ALIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" - def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' - def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' - def extension = bam_format ? "bam" : bed_format ? "bed" : "paf" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ - touch ${prefix}.${extension} + touch $output_file + ${bam_index} cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) END_VERSIONS """ - } - diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 991b39a0..a4cfc891 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -14,52 +14,86 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 00000000..4072c171 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,441 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..12264a85 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,476 @@ +{ + "sarscov2 - bam, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:18.964586894" + }, + "sarscov2 - fastq, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:17:48.667488325" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:02.517416733" + }, + "sarscov2 - bam, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:34.246998277" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 00000000..39dba374 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/mummer/environment.yml b/modules/nf-core/mummer/environment.yml new file mode 100644 index 00000000..643eff0f --- /dev/null +++ b/modules/nf-core/mummer/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::mummer=3.23 diff --git a/modules/nf-core/mummer/main.nf b/modules/nf-core/mummer/main.nf index 37d8e2b0..17b05dd5 100644 --- a/modules/nf-core/mummer/main.nf +++ b/modules/nf-core/mummer/main.nf @@ -3,13 +3,13 @@ process MUMMER { label 'process_low' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda "bioconda::mummer=3.23" + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mummer:3.23--pl5262h1b792b2_12' : 'biocontainers/mummer:3.23--pl5262h1b792b2_12' }" input: - tuple val(meta), path(ref, stageAs: 'ref.fasta'), path(query, stageAs: 'query.fasta') // Staging removes the ability to deal with zipped files here + tuple val(meta), path(ref), path(query) output: tuple val(meta), path("*.coords"), emit: coords @@ -50,7 +50,7 @@ process MUMMER { def prefix = task.ext.prefix ?: "${meta.id}" def VERSION = '3.23' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ - touch ${prefx}.coords + touch ${prefix}.coords cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/mummer/meta.yml b/modules/nf-core/mummer/meta.yml index f03d483c..3a4be42f 100644 --- a/modules/nf-core/mummer/meta.yml +++ b/modules/nf-core/mummer/meta.yml @@ -12,37 +12,41 @@ tools: tool_dev_url: http://mummer.sourceforge.net/ doi: 10.1186/gb-2004-5-2-r12 licence: ["The Artistic License"] - + identifier: biotools:mummer input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ref: - type: file - description: FASTA file of the reference sequence - pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz}" - - query: - type: file - description: FASTA file of the query sequence - pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz}" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ref: + type: file + description: FASTA file of the reference sequence + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz}" + - query: + type: file + description: FASTA file of the query sequence + pattern: "*.{fasta,fasta.gz,fa,fa.gz,fna,fna.gz}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - coords: - type: file - description: File containing coordinates of matches between reference and query sequence - pattern: "*.coords" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.coords": + type: file + description: File containing coordinates of matches between reference and query + sequence + pattern: "*.coords" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@mjcipriano" - "@sateeshperi" +maintainers: + - "@mjcipriano" + - "@sateeshperi" diff --git a/modules/nf-core/mummer/mummer.diff b/modules/nf-core/mummer/mummer.diff deleted file mode 100644 index a0391dd0..00000000 --- a/modules/nf-core/mummer/mummer.diff +++ /dev/null @@ -1,14 +0,0 @@ -Changes in module 'nf-core/mummer' ---- modules/nf-core/mummer/main.nf -+++ modules/nf-core/mummer/main.nf -@@ -9,7 +9,7 @@ - 'biocontainers/mummer:3.23--pl5262h1b792b2_12' }" - - input: -- tuple val(meta), path(ref), path(query) -+ tuple val(meta), path(ref, stageAs: 'ref.fasta'), path(query, stageAs: 'query.fasta') // Staging removes the ability to deal with zipped files here - - output: - tuple val(meta), path("*.coords"), emit: coords - -************************************************************ diff --git a/modules/nf-core/mummer/tests/main.nf.test b/modules/nf-core/mummer/tests/main.nf.test new file mode 100644 index 00000000..b079d209 --- /dev/null +++ b/modules/nf-core/mummer/tests/main.nf.test @@ -0,0 +1,58 @@ +nextflow_process { + + name "Test Process MUMMER" + script "../main.nf" + process "MUMMER" + + tag "modules" + tag "modules_nfcore" + tag "mummer" + + test("sarscov2 - bam") { + + when { + process { + """ + + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mummer/tests/main.nf.test.snap b/modules/nf-core/mummer/tests/main.nf.test.snap new file mode 100644 index 00000000..fea2282a --- /dev/null +++ b/modules/nf-core/mummer/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coords:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,c36e386e2de805cf815ba20359fca81c" + ], + "coords": [ + [ + { + "id": "test" + }, + "test.coords:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,c36e386e2de805cf815ba20359fca81c" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-12T06:08:50.368788562" + }, + "sarscov2 - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.coords:md5,6084fe43c7cb2eca8b96d674560bdefc" + ] + ], + "1": [ + "versions.yml:md5,c36e386e2de805cf815ba20359fca81c" + ], + "coords": [ + [ + { + "id": "test" + }, + "test.coords:md5,6084fe43c7cb2eca8b96d674560bdefc" + ] + ], + "versions": [ + "versions.yml:md5,c36e386e2de805cf815ba20359fca81c" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.0" + }, + "timestamp": "2024-11-12T06:08:40.932078101" + } +} \ No newline at end of file diff --git a/subworkflows/local/read_coverage.nf b/subworkflows/local/read_coverage.nf index 6f553160..37d1f753 100755 --- a/subworkflows/local/read_coverage.nf +++ b/subworkflows/local/read_coverage.nf @@ -66,8 +66,9 @@ workflow READ_COVERAGE { pre_minimap_input .multiMap { meta, reads_path, ref, bam_output, cigar_paf, cigar_bam, bed_output, reads_type -> read_tuple : tuple( meta, reads_path) - ref : ref + ref : tuple( meta, ref) bool_bam_ouput : bam_output + val_bam_index : "bai" bool_cigar_paf : cigar_paf bool_cigar_bam : cigar_bam bool_bed_output : bed_output @@ -81,6 +82,7 @@ workflow READ_COVERAGE { minimap_input.read_tuple, minimap_input.ref, minimap_input.bool_bam_ouput, + minimap_input.val_bam_index, minimap_input.bool_cigar_paf, minimap_input.bool_cigar_bam, minimap_input.bool_bed_output diff --git a/subworkflows/local/synteny.nf b/subworkflows/local/synteny.nf index 04b43cf4..f18ac0a0 100755 --- a/subworkflows/local/synteny.nf +++ b/subworkflows/local/synteny.nf @@ -26,8 +26,9 @@ workflow SYNTENY { .combine(reference_tuple) .multiMap{syntenic_ref, meta, ref -> syntenic_tuple : tuple(meta, syntenic_ref) - reference_fa : ref + reference_fa : tuple(meta, ref) bool_bam_output : false + val_bam_index : "bai" bool_cigar_paf : true bool_cigar_bam : false bool_bedfile : false @@ -42,6 +43,7 @@ workflow SYNTENY { mm_input.syntenic_tuple, mm_input.reference_fa, mm_input.bool_bam_output, + mm_input.val_bam_index, mm_input.bool_cigar_paf, mm_input.bool_cigar_bam, mm_input.bool_bedfile, From b31d536b825591d1bef5eb0e1a20b01f6a0dbf6b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 22 Nov 2024 14:20:40 +0000 Subject: [PATCH 03/12] Updates --- modules/nf-core/minimap2/align/main.nf | 2 +- subworkflows/local/nuc_alignments.nf | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index c14d377d..4faab880 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -35,7 +35,7 @@ process MINIMAP2_ALIGN { def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" - def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : bed_format ? "| paftools.js splice2bed - > ${prefix}.bed " : "-o ${prefix}.paf" + def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : bed_bool ? "| paftools.js splice2bed - > ${prefix}.bed " : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' def bam_input = "${reads.extension}".matches('sam|bam|cram') diff --git a/subworkflows/local/nuc_alignments.nf b/subworkflows/local/nuc_alignments.nf index e918b643..1aa0a4da 100755 --- a/subworkflows/local/nuc_alignments.nf +++ b/subworkflows/local/nuc_alignments.nf @@ -55,8 +55,9 @@ workflow NUC_ALIGNMENTS { } .multiMap {meta, nuc_file, reference, bool_1, bool_2, bool_3, bool_4 -> nuc : tuple(meta, nuc_file) - ref : reference + ref : tuple(meta, reference) bool_bam_output : bool_1 + val_bam_output : "bai" bool_cigar_paf : bool_2 bool_cigar_bam : bool_3 bool_bedfile : bool_4 @@ -71,6 +72,7 @@ workflow NUC_ALIGNMENTS { formatted_input.nuc, formatted_input.ref, formatted_input.bool_bam_output, + formatted_input.val_bam_output, formatted_input.bool_cigar_paf, formatted_input.bool_cigar_bam, formatted_input.bool_bedfile From 60a064234d41ce05743831e66b49828631565892 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 22 Nov 2024 15:52:27 +0000 Subject: [PATCH 04/12] Updates --- modules/nf-core/busco/busco/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf index f5650847..f7c1a662 100644 --- a/modules/nf-core/busco/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -4,8 +4,8 @@ process BUSCO_BUSCO { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.5.0--pyhdfd78af_0': - 'biocontainers/busco:5.5.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': + 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" input: tuple val(meta), path(fasta, stageAs:'tmp_input/*') From 0da07eecace087f8b0a3064a052f83c11e86fc79 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 25 Nov 2024 13:20:25 +0000 Subject: [PATCH 05/12] Update to modules and changelogs to remove Anaconda --- CHANGELOG.md | 2 +- CITATIONS.md | 4 +-- README.md | 2 +- modules/local/pretext_graph.nf | 2 +- modules/nf-core/busco/busco/busco-busco.diff | 26 -------------------- modules/nf-core/pretextmap/main.nf | 2 +- 6 files changed, 6 insertions(+), 32 deletions(-) delete mode 100644 modules/nf-core/busco/busco/busco-busco.diff diff --git a/CHANGELOG.md b/CHANGELOG.md index fd69b777..bc4f3052 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,7 +56,7 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i | miniprot | 0.11--he4a0461_2 | | | mummer | 3.23 | | | paftools ( minimap2 + samtools ) | 2.24 + 1.14 | | -| pretextmap + samtools | 0.0.2 + 1.17 | 0.1.9 + 1.17 | +| pretextmap + samtools | 0.0.2 + 1.17 | 0.0.3 + 1.17 | | python | 3.9 | - | | - pandas | 1.5.2 | - | | samtools | 1.18 | 1.21 | diff --git a/CITATIONS.md b/CITATIONS.md index 7db8de50..2c7295da 100755 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -112,9 +112,9 @@ ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Conda](https://conda.org/) - > Anaconda Software Distribution. 2016. Computer software. Vers. 2-2.4.0. Anaconda, Web. + > conda contributors. conda: A system-level, binary package and environment manager running on all major operating systems and platforms. Computer software. https://github.com/conda/conda - [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) diff --git a/README.md b/README.md index ada31f21..a1a95647 100755 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![Cite with Zenodo](https://zenodo.org/badge/509096312.svg)](https://zenodo.org/doi/10.5281/zenodo.10047653) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=conda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/treeval) diff --git a/modules/local/pretext_graph.nf b/modules/local/pretext_graph.nf index 9a1d3ff2..60527482 100644 --- a/modules/local/pretext_graph.nf +++ b/modules/local/pretext_graph.nf @@ -2,7 +2,7 @@ process PRETEXT_GRAPH { tag "$meta.id" label 'process_single' - container "quay.io/sanger-tol/pretext:0.0.2-yy5-c3" + container "quay.io/sanger-tol/pretext:0.0.3-yy5-c1" // Exit if running this module with -profile conda / -profile mamba if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { diff --git a/modules/nf-core/busco/busco/busco-busco.diff b/modules/nf-core/busco/busco/busco-busco.diff deleted file mode 100644 index ea0a471b..00000000 --- a/modules/nf-core/busco/busco/busco-busco.diff +++ /dev/null @@ -1,26 +0,0 @@ -Changes in module 'nf-core/busco/busco' -'modules/nf-core/busco/busco/meta.yml' is unchanged -Changes in 'busco/busco/main.nf': ---- modules/nf-core/busco/busco/main.nf -+++ modules/nf-core/busco/busco/main.nf -@@ -4,8 +4,8 @@ - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? -- 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': -- 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" -+ 'https://depot.galaxyproject.org/singularity/busco:5.5.0--pyhdfd78af_0': -+ 'biocontainers/busco:5.5.0--pyhdfd78af_0' }" - - input: - tuple val(meta), path(fasta, stageAs:'tmp_input/*') - -'modules/nf-core/busco/busco/environment.yml' is unchanged -'modules/nf-core/busco/busco/tests/old_test.yml' is unchanged -'modules/nf-core/busco/busco/tests/nextflow.augustus.config' is unchanged -'modules/nf-core/busco/busco/tests/main.nf.test' is unchanged -'modules/nf-core/busco/busco/tests/tags.yml' is unchanged -'modules/nf-core/busco/busco/tests/nextflow.config' is unchanged -'modules/nf-core/busco/busco/tests/main.nf.test.snap' is unchanged -'modules/nf-core/busco/busco/tests/nextflow.metaeuk.config' is unchanged -************************************************************ diff --git a/modules/nf-core/pretextmap/main.nf b/modules/nf-core/pretextmap/main.nf index 95824c45..cbcaf68c 100644 --- a/modules/nf-core/pretextmap/main.nf +++ b/modules/nf-core/pretextmap/main.nf @@ -4,7 +4,7 @@ process PRETEXTMAP { label 'process_single' conda "${moduleDir}/environment.yml" - container "quay.io/sanger-tol/pretext:0.0.2-yy5-c4" + container "quay.io/sanger-tol/pretext:0.0.3-yy5-c1" input: tuple val(meta), path(input) From d768e47a12f9cf040972d8debe4365d845c17cd8 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 25 Nov 2024 13:26:40 +0000 Subject: [PATCH 06/12] PRETTIER --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc4f3052..4c188317 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,7 @@ Our 3rd release for sanger-tol/treeval. - reformat_intersect was previously not outputing version data. - Adding arch specification to Pretext GitHub actions runner. Hopefully this will stop the spurious errors we see on there. - Addition of steps into schema. -- Adds *ktab as an output. +- Adds \*ktab as an output. ### Parameters @@ -35,7 +35,7 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i | -------------------------------------- | ---------------- | ------------ | | bamtobed_sort ( bedtools + samtools ) | 2.31.0 + 1.17 | | | bedtools | 2.31.1 | - | -| busco* | 5.5.0 | - | +| busco\* | 5.5.0 | - | | bwa-mem2 | 2.2.1 | | | cat | 2.3.4 | | | chunk_fasta ( pyfasta ) | 0.5.2-1 | | @@ -66,7 +66,7 @@ Note, since the pipeline is using Nextflow DSL2, each process will be run with i | ucsc | 377 | 447 | | windowmasker (blast) | 2.14.0 | 2.15.0 | -* busco is currently pinned to v5.5.0 - Upgrading v5.7.1 would cause github actions to crash. Further investigation needed. +- busco is currently pinned to v5.5.0 - Upgrading v5.7.1 would cause github actions to crash. Further investigation needed. ## [1.1.1] - Ancient Aurora (H1) - [2024-04-26] From 52ba726993ba267639fd8dda2fb384ca236ce6a1 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 25 Nov 2024 13:28:32 +0000 Subject: [PATCH 07/12] PRETTIER --- modules/nf-core/fastk/fastk/environment.yml | 4 ++-- modules/nf-core/merquryfk/merquryfk/environment.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/nf-core/fastk/fastk/environment.yml b/modules/nf-core/fastk/fastk/environment.yml index 19ab079a..690a3f7a 100644 --- a/modules/nf-core/fastk/fastk/environment.yml +++ b/modules/nf-core/fastk/fastk/environment.yml @@ -1,4 +1,4 @@ name: fastk_fastk channels: - - conda-forge - - bioconda + - conda-forge + - bioconda diff --git a/modules/nf-core/merquryfk/merquryfk/environment.yml b/modules/nf-core/merquryfk/merquryfk/environment.yml index 50d352bf..fa6cb1a0 100644 --- a/modules/nf-core/merquryfk/merquryfk/environment.yml +++ b/modules/nf-core/merquryfk/merquryfk/environment.yml @@ -1,4 +1,4 @@ name: merquryfk_merquryfk channels: - - conda-forge - - bioconda + - conda-forge + - bioconda From f080b9783bae2c1f00148bf1ee759037594fa73e Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 26 Nov 2024 09:57:25 +0000 Subject: [PATCH 08/12] Update modules.config for BUSCO_BUSCO --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 34b9d8bd..d9dd20f1 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -316,7 +316,7 @@ process { } withName: "BUSCO_BUSCO" { - ext.args = "--offline" + ext.args = "--offline --metaeuk" } From 504323416a84a9f9320a9a87b2240f5df638c39f Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 26 Nov 2024 12:30:34 +0000 Subject: [PATCH 09/12] New module replaces chunkfasta --- modules/local/seqkit/split/main.nf | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100755 modules/local/seqkit/split/main.nf diff --git a/modules/local/seqkit/split/main.nf b/modules/local/seqkit/split/main.nf new file mode 100755 index 00000000..fa0dfdbf --- /dev/null +++ b/modules/local/seqkit/split/main.nf @@ -0,0 +1,43 @@ +process SEQKIT_SPLIT { + tag "${meta.id}" + label 'process_low' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.9.0--h9ee0642_0' : + 'biocontainers/seqkit:2.9.0--h9ee0642_0' }" + + input: + tuple val(meta), path(fasta) + val(number_of_chunks) + + output: + tuple val(meta), path('*.fa'), emit: fasta + path "versions.yml", emit: versions + + script: + // This should be abstracted outside of the container to + // stop it spinning up in the first place, + // however dsl2 can't do comparisons with channels which makes it harder + """ + if [ $number_of_chunks -le 1 ]; then + mv input.fasta ${meta.id}_whole.fa + else + seqkit split ${fasta} -p $number_of_chunks -O ./ + fi + + cat <<-END_VERSIONS > versions.yml + "${task.proce ss}": + seqkit: \$(seqkit version | sed -e "s/seqkit v//g") + END_VERSIONS + """ + + stub: + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | sed -e "s/seqkit v//g") + END_VERSIONS + """ +} From 286c1eab20721cd5bb87204d08973e3dbcde464b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 26 Nov 2024 12:30:57 +0000 Subject: [PATCH 10/12] Updates to changelog and removing CHUNKFASTA, replaced with seqkit --- CHANGELOG.md | 72 ++++++++++++++++++---------------- modules/local/chunkfasta.nf | 48 ----------------------- subworkflows/local/selfcomp.nf | 13 +++--- 3 files changed, 44 insertions(+), 89 deletions(-) delete mode 100755 modules/local/chunkfasta.nf diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c188317..6be0c8b6 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ Our 3rd release for sanger-tol/treeval. - Adding arch specification to Pretext GitHub actions runner. Hopefully this will stop the spurious errors we see on there. - Addition of steps into schema. - Adds \*ktab as an output. +- Updated singularity containers +- Added `--metaeuk` to BUSCO_BUSCO, default was causing pipeline errors on Actions -- Needs more investigation. +- Replaced Pyfasta split (depreciated 6 years ago) with Seqkit split which is frequently updated and very fast. ### Parameters @@ -31,40 +34,41 @@ Our 3rd release for sanger-tol/treeval. Note, since the pipeline is using Nextflow DSL2, each process will be run with its own Biocontainer. This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. -| Module | Old Version | New Versions | -| -------------------------------------- | ---------------- | ------------ | -| bamtobed_sort ( bedtools + samtools ) | 2.31.0 + 1.17 | | -| bedtools | 2.31.1 | - | -| busco\* | 5.5.0 | - | -| bwa-mem2 | 2.2.1 | | -| cat | 2.3.4 | | -| chunk_fasta ( pyfasta ) | 0.5.2-1 | | -| cooler | 0.9.2 | | -| cram_filter_align_bwamem2_fixmate_sort | - | | -| ^ ( samtools + bwamem2 ) ^ | 1.17 + 2.2.1 | | -| coreutils | 9.1 | | -| fastk | 1.0.1 | | -| gcc | 10.4.0 | | -| find_telomere_windows ( java-jdk ) | 8.0.112 | | -| generate_cram_csv ( samtools ) | 1.17 | | -| gnu-sort | 8.25 | 9.3 | -| juicer_tools_pre ( java-jdk ) | 8.0.112 | | -| perl | 5.26.2 | | -| merquryfk | 1.0.1 | | -| minimap2 + samtools | 2.24 + 1.14 | | -| minimap2_index | 2.24 | 2.28 | -| miniprot | 0.11--he4a0461_2 | | -| mummer | 3.23 | | -| paftools ( minimap2 + samtools ) | 2.24 + 1.14 | | -| pretextmap + samtools | 0.0.2 + 1.17 | 0.0.3 + 1.17 | -| python | 3.9 | - | -| - pandas | 1.5.2 | - | -| samtools | 1.18 | 1.21 | -| selfcomp_splitfasta ( perl-bioperl ) | 1.7.8-1 | | -| seqtk | 1.4 | | -| tabix | 1.11 | | -| ucsc | 377 | 447 | -| windowmasker (blast) | 2.14.0 | 2.15.0 | +| Module | Old Version | New Versions | +| -------------------------------------- | ---------------- | ----------------- | +| bamtobed_sort ( bedtools + samtools ) | 2.31.0 + 1.17 | | +| bedtools | 2.31.1 | - | +| busco\* | 5.5.0 | - | +| bwa-mem2 | 2.2.1 | | +| cat | 2.3.4 | | +| chunk_fasta ( pyfasta ) | 0.5.2-1 | REMOVED | +| cooler | 0.9.2 | | +| cram_filter_align_bwamem2_fixmate_sort | - | | +| ^ ( samtools + bwamem2 ) ^ | 1.17 + 2.2.1 | | +| coreutils | 9.1 | | +| fastk | 1.0.1 | | +| gcc | 10.4.0 | | +| find_telomere_windows ( java-jdk ) | 8.0.112 | | +| generate_cram_csv ( samtools ) | 1.17 | | +| gnu-sort | 8.25 | 9.3 | +| juicer_tools_pre ( java-jdk ) | 8.0.112 | | +| perl | 5.26.2 | | +| merquryfk | 1.0.1 | | +| minimap2 + samtools | 2.24 + 1.14 | | +| minimap2_index | 2.24 | 2.28 | +| miniprot | 0.11--he4a0461_2 | | +| mummer | 3.23 | | +| paftools ( minimap2 + samtools ) | 2.24 + 1.14 | | +| pretextmap + samtools | 0.0.2 + 1.17 | 0.0.3 + 1.17 | +| python | 3.9 | - | +| - pandas | 1.5.2 | - | +| samtools | 1.18 | 1.21 | +| selfcomp_splitfasta ( perl-bioperl ) | 1.7.8-1 | | +| seqtk | 1.4 | | +| seqkit | ADDED | 2.9.0--h9ee0642_0 | +| tabix | 1.11 | | +| ucsc | 377 | 447 | +| windowmasker (blast) | 2.14.0 | 2.15.0 | - busco is currently pinned to v5.5.0 - Upgrading v5.7.1 would cause github actions to crash. Further investigation needed. diff --git a/modules/local/chunkfasta.nf b/modules/local/chunkfasta.nf deleted file mode 100755 index 0400df24..00000000 --- a/modules/local/chunkfasta.nf +++ /dev/null @@ -1,48 +0,0 @@ -process CHUNKFASTA { - tag "${meta.id}" - label 'process_low' - - conda "conda-forge::pyfasta=0.5.2-1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pyfasta:0.5.2--py_1' : - 'biocontainers/pyfasta:0.5.2--py_1' }" - - input: - tuple val(meta), path('input.fasta') - val(number_of_chunks) - - output: - tuple val(meta), path('*.fasta'), emit: fasta - path "versions.yml" , emit: versions - - script: - def VERSION = '0.5.2' // Tool does not report version - // This should be abstracted outside of the container to - // stop it spinning up in the first place, - // however dsl2 can't do comparisons with channels which makes it harder - """ - if [ $number_of_chunks -le 1 ]; then - mv input.fasta ${meta.id}_whole.fasta - else - pyfasta split -n $number_of_chunks input.fasta - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - pyfasta: $VERSION - END_VERSIONS - """ - - stub: - def VERSION = '0.5.2' // Tool does not report version - """ - touch ${meta.id}.fa - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - pyfasta: $VERSION - END_VERSIONS - """ -} diff --git a/subworkflows/local/selfcomp.nf b/subworkflows/local/selfcomp.nf index 9d1e0e95..8e702635 100755 --- a/subworkflows/local/selfcomp.nf +++ b/subworkflows/local/selfcomp.nf @@ -12,7 +12,7 @@ include { BEDTOOLS_SORT } from '../../modules/nf-core/bedtools/ include { SELFCOMP_SPLITFASTA } from '../../modules/local/selfcomp_splitfasta' include { SELFCOMP_MUMMER2BED } from '../../modules/local/selfcomp_mummer2bed' include { SELFCOMP_MAPIDS } from '../../modules/local/selfcomp_mapids' -include { CHUNKFASTA } from '../../modules/local/chunkfasta' +include { SEQKIT_SPLIT } from '../../modules/local/seqkit/split/main' include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' include { SELFCOMP_ALIGNMENTBLOCKS } from '../../modules/local/selfcomp_alignmentblocks' include { CONCATBLOCKS } from '../../modules/local/concatblocks' @@ -57,28 +57,28 @@ workflow SELFCOMP { // MODULE: SPLIT REFERENCE FILE INTO 1GB CHUNKS // THIS IS THE QUERY, AND REFERENCE IF GENOME.size() > 1GB // - CHUNKFASTA( + SEQKIT_SPLIT( SELFCOMP_SPLITFASTA.out.fa, chunk_number ) - ch_versions = ch_versions.mix(CHUNKFASTA.out.versions) + ch_versions = ch_versions.mix(SEQKIT_SPLIT.out.versions) // // LOGIC: STRIP META FROM QUERY, AND COMBINE WITH REFERENCE FILE // THIS LEAVES US WITH n=( REFERENCE + QUERY) IF GENOME.SIZE() < 1GB // OR n=((REFERENCE / 1E9) * (REFENCE / 1E9)) IF GENOME.SIZE() > 1GB // - CHUNKFASTA.out.fasta + SEQKIT_SPLIT.out.fasta .map{meta, query -> query } - .collect() // Collect any output from CHUNKFASTA + .collect() // Collect any output from SEQKIT_SPLIT .map {it -> tuple( [ len: it.size() ], // Calc length of list it ) } - .set {len_ch} // tap out to preserve length of CHUNKFASTA list + .set {len_ch} // tap out to preserve length of SEQKIT_SPLIT list len_ch // tap swapped with set as tap stops pipeline completion .map {meta, files -> @@ -216,4 +216,3 @@ workflow SELFCOMP { ch_bigbed = UCSC_BEDTOBIGBED.out.bigbed versions = ch_versions.ifEmpty(null) } - From b268f62ce96365b3b811a610fcb0c12579e44615 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 26 Nov 2024 12:39:07 +0000 Subject: [PATCH 11/12] Random Space --- modules/local/seqkit/split/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/seqkit/split/main.nf b/modules/local/seqkit/split/main.nf index fa0dfdbf..e23dc29c 100755 --- a/modules/local/seqkit/split/main.nf +++ b/modules/local/seqkit/split/main.nf @@ -26,7 +26,7 @@ process SEQKIT_SPLIT { fi cat <<-END_VERSIONS > versions.yml - "${task.proce ss}": + "${task.process}": seqkit: \$(seqkit version | sed -e "s/seqkit v//g") END_VERSIONS """ From 8cf822c3417986bcb55cc2eb917bdbb2d29ad079 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 26 Nov 2024 12:58:01 +0000 Subject: [PATCH 12/12] Re-add the assignment --- modules/local/seqkit/split/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/seqkit/split/main.nf b/modules/local/seqkit/split/main.nf index e23dc29c..d9266738 100755 --- a/modules/local/seqkit/split/main.nf +++ b/modules/local/seqkit/split/main.nf @@ -7,7 +7,7 @@ process SEQKIT_SPLIT { 'biocontainers/seqkit:2.9.0--h9ee0642_0' }" input: - tuple val(meta), path(fasta) + tuple val(meta), path('input.fasta') val(number_of_chunks) output: @@ -22,7 +22,7 @@ process SEQKIT_SPLIT { if [ $number_of_chunks -le 1 ]; then mv input.fasta ${meta.id}_whole.fa else - seqkit split ${fasta} -p $number_of_chunks -O ./ + seqkit split input.fasta -p $number_of_chunks -O ./ fi cat <<-END_VERSIONS > versions.yml