diff --git a/CHANGELOG.md b/CHANGELOG.md index 56007304..2ef54b3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,35 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[0.4.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.4.0)] – Buneary – [2024-03-28] + +The pipeline has now been validated on dozens of genomes, up to 11 Gbp. + +### Enhancements & fixes + +- Upgraded the version of `blobtools`, which enables a better reporting of + wrong accession numbers and a better handling of oddities in input files. +- Files in the output blobdir are now compressed. +- All modules handling blobdirs can now be cached. +- Large genomes supported, up to at least 11 Gbp. +- Allow all variations of FASTA and FASTQ extensions for input. +- More fields included in the trace files. +- All nf-core modules updated + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ------------- | ------------- | +| blobtoolkit | 4.3.3 | 4.3.9 | +| blast | 2.14.0 | 2.15.0 | +| multiqc | 1.17 and 1.18 | 1.20 and 1.21 | +| samtools | 1.18 | 1.19.2 | +| seqtk | 1.3 | 1.4 | + +> **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. + ## [[0.3.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.3.0)] – Poliwag – [2024-02-09] The pipeline has now been validated on five genomes, all under 100 Mbp: a @@ -33,6 +62,16 @@ sponge, a platyhelminth, and three fungi. > **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| blobtoolkit | 4.3.2 | 4.3.3 | + +> **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. + ## [[0.2.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.2.0)] – Pikachu – [2023-12-22] ### Enhancements & fixes diff --git a/README.md b/README.md index c2f2a9fc..31512bc0 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ ## Introduction -**sanger-tol/blobtoolkit** is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes. It takes a samplesheet and aligned CRAM files as input, calculates genome statistics, coverage and completeness information, combines them in a TSV file by window size to create a BlobDir dataset and static plots. +**sanger-tol/blobtoolkit** is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes. +It takes a samplesheet of BAM/CRAM/FASTQ/FASTA files as input, calculates genome statistics, coverage and completeness information, combines them in a TSV file by window size to create a BlobDir dataset and static plots. 1. Calculate genome statistics in windows ([`fastawindows`](https://github.com/tolkit/fasta_windows)) 2. Calculate Coverage ([`blobtk/depth`](https://github.com/blobtoolkit/blobtk)) diff --git a/assets/schema_input.json b/assets/schema_input.json index f08ccb89..26ed41cb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -21,8 +21,8 @@ }, "datafile": { "type": "string", - "pattern": "^\\S+\\.cram$", - "errorMessage": "Data file for reads cannot contain spaces and must have extension 'cram'" + "pattern": "^\\S+\\.(bam|cram|fa|fa.gz|fasta|fasta.gz|fq|fq.gz|fastq|fastq.gz)$", + "errorMessage": "Data file for reads cannot contain spaces and must be BAM/CRAM/FASTQ/FASTA" } }, "required": ["datafile", "datatype", "sample"] diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index c63d06fe..6b5392bf 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -27,8 +27,14 @@ class RowChecker: VALID_FORMATS = ( ".cram", ".bam", + ".fq", + ".fq.gz", ".fastq", ".fastq.gz", + ".fa", + ".fa.gz", + ".fasta", + ".fasta.gz", ) VALID_DATATYPES = ( diff --git a/bin/update_versions.py b/bin/update_versions.py index 0978393c..9e014b46 100755 --- a/bin/update_versions.py +++ b/bin/update_versions.py @@ -12,9 +12,10 @@ def parse_args(args=None): Description = "Combine BED files to create window stats input file." parser = argparse.ArgumentParser(description=Description) - parser.add_argument("--meta", help="Input JSON file.", required=True) + parser.add_argument("--meta_in", help="Input JSON file.", required=True) + parser.add_argument("--meta_out", help="Output JSON file.", required=True) parser.add_argument("--software", help="Input YAML file.", required=True) - parser.add_argument("--version", action="version", version="%(prog)s 1.0.0") + parser.add_argument("--version", action="version", version="%(prog)s 1.1.0") return parser.parse_args(args) @@ -41,8 +42,8 @@ def update_meta(meta, software): def main(args=None): args = parse_args(args) - data = update_meta(args.meta, args.software) - with open(args.meta, "w") as fh: + data = update_meta(args.meta_in, args.software) + with open(args.meta_out, "w") as fh: json.dump(data, fh) diff --git a/conf/base.config b/conf/base.config index 6ebea12c..8f51f7f8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -52,6 +52,58 @@ process { withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + + withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_CCS' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 4.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + + // Extrapolated from the HIFI settings on the basis of 1 ONT alignment. CLR assumed to behave the same way as ONT + withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(CLR|ONT)' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 30.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 1.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + + // Temporarily the same settings as CCS + withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(HIC|ILMN)' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + + withName: 'WINDOWSTATS_INPUT' { + cpus = { check_max( 1 , 'cpus' ) } + // 2 GB per 1 Gbp + memory = { check_max( 2.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000), 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withName: 'BLOBTOOLKIT_WINDOWSTATS' { + cpus = { check_max( 1 , 'cpus' ) } + // 3 GB per 1 Gbp + memory = { check_max( 3.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000), 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + + withName: 'FASTAWINDOWS' { + // 1 CPU per 1 Gbp + cpus = { check_max( Math.ceil(meta.genome_size / 1000000000), 'cpus' ) } + // 100 MB per 45 Mbp + memory = { check_max( 100.MB * task.attempt * Math.ceil(meta.genome_size / 45000000), 'memory' ) } + } + + withName: BUSCO { + // The formulas below are equivalent to these ranges: + // Gbp: [ 1, 2, 4, 8, 16] + // CPUs: [ 8, 12, 16, 20, 24] + // GB RAM: [16, 32, 64, 128, 256] + memory = { check_max( 1.GB * Math.pow(2, 3 + task.attempt + Math.ceil(positive_log(meta.genome_size/1000000000, 2))) , 'memory' ) } + cpus = { log_increase_cpus(4, 4*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } + time = { check_max( 3.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/conf/modules.config b/conf/modules.config index 974728f5..439a77b3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -29,23 +29,23 @@ process { } withName: "MINIMAP2_HIC" { - ext.args = "-ax sr" + ext.args = { "-ax sr -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_ILMN" { - ext.args = "-ax sr" + ext.args = { "-ax sr -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_CCS" { - ext.args = "-ax map-hifi --cs=short" + ext.args = { "-ax map-hifi --cs=short -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_CLR" { - ext.args = "-ax map-pb" + ext.args = { "-ax map-pb -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_ONT" { - ext.args = "-ax map-ont" + ext.args = { "-ax map-ont -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "SAMTOOLS_VIEW" { @@ -67,6 +67,9 @@ process { // Note: BUSCO *must* see the double-quotes around the parameters '--force --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' : '--force' } + } + + withName: "RESTRUCTUREBUSCODIR" { publishDir = [ path: { "${params.outdir}/busco" }, mode: params.publish_dir_mode, @@ -98,22 +101,6 @@ process { ext.args = "--evalue 1.0e-25 --hit-count 10" } - withName: "BLOBTOOLKIT_SUMMARY" { - publishDir = [ - path: { "${params.outdir}/blobtoolkit/${blobdir.name}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } - ] - } - - withName: "BLOBTK_IMAGES" { - publishDir = [ - path: { "${params.outdir}/blobtoolkit/plots" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } - ] - } - withName: "BLOBTOOLKIT_CHUNK" { ext.args = "--chunk 100000 --overlap 0 --max-chunks 10 --min-length 1000" } @@ -138,7 +125,7 @@ process { ] } - withName: "BLOBTOOLKIT_UPDATEMETA" { + withName: "COMPRESSBLOBDIR" { publishDir = [ path: { "${params.outdir}/blobtoolkit" }, mode: params.publish_dir_mode, @@ -146,6 +133,14 @@ process { ] } + withName: "BLOBTK_IMAGES" { + publishDir = [ + path: { "${params.outdir}/blobtoolkit/plots" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/docs/output.md b/docs/output.md index e6efe8bc..18fe2b6d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -8,13 +8,13 @@ The directories listed below will be created in the results directory after the The directories comply with Tree of Life's canonical directory structure. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [BlobDir](#blobdir) - Output files from `blobtools` and `view` subworkflow +- [BlobDir](#blobdir) - Output files viewable on a [BlobToolKit viewer](https://github.com/blobtoolkit/blobtoolkit) +- [Static plots](#static-plots) - Static versions of the BlobToolKit plots +- [BUSCO](#busco) - BUSCO results - [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -25,14 +25,43 @@ The files in the BlobDir dataset which is used to create the online interactive
Output files -- `/` - - `*.json`: files generated from genome and alignment coverage statistics - - `*.png`: static plot images +- `blobtoolkit/` + - `/` + - `*.json.gz`: files generated from genome and alignment coverage statistics More information about visualising the data in the [BlobToolKit repository](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/viewer)
+### Static plots + +Images generated from the above blobdir using the [blobtk](https://github.com/blobtoolkit/blobtk) tool. + +
+Output files + +- `blobtoolkit/` + - `plots/` + - `*.png` or `*.svg`, depending on the selected output format: static versions of the BlobToolKit plots. + +
+ +### BUSCO + +BUSCO results generated by the pipeline (all BUSCO lineages that match the claassification of the species). + +
+Output files + +- `blobtoolkit/` + - `busco/` + - `*.batch_summary.txt`: BUSCO scores as tab-separated files (1 file per lineage). + - `*.fasta.txt`: BUSCO scores as formatted text (1 file per lineage). + - `*.json`: BUSCO scores as JSON (1 file per lineage). + - `*/`: all output BUSCO files, including the coordinate and sequence files of the annotated genes. + +
+ ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index 84229b17..4789ff84 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -229,8 +229,8 @@ List of tools for any given dataset can be fetched from the API, for example htt | Dependency | Snakemake | Nextflow | | ----------------- | --------- | -------- | -| blobtoolkit | 4.3.2 | 4.3.2 | -| blast | 2.12.0 | 2.14.1 | +| blobtoolkit | 4.3.2 | 4.3.9 | +| blast | 2.12.0 | 2.15.0 | | blobtk | 0.5.0 | 0.5.1 | | busco | 5.3.2 | 5.5.0 | | diamond | 2.0.15 | 2.1.8 | @@ -240,8 +240,8 @@ List of tools for any given dataset can be fetched from the API, for example htt | ncbi-datasets-cli | 14.1.0 | | | nextflow | | 23.10.0 | | python | 3.9.13 | 3.12.0 | -| samtools | 1.15.1 | 1.18 | -| seqtk | 1.3 | | +| samtools | 1.15.1 | 1.19.2 | +| seqtk | 1.3 | 1.4 | | snakemake | 7.19.1 | | | windowmasker | 2.12.0 | 2.14.0 | diff --git a/modules.json b/modules.json index 7ba1a8db..667a4482 100644 --- a/modules.json +++ b/modules.json @@ -19,13 +19,12 @@ }, "cat/cat": { "branch": "master", - "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", - "installed_by": ["modules"], - "patch": "modules/nf-core/cat/cat/cat-cat.diff" + "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", + "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", "installed_by": ["modules"] }, "diamond/blastp": { @@ -41,7 +40,8 @@ "fastawindows": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/fastawindows/fastawindows.diff" }, "goat/taxonsearch": { "branch": "master", @@ -50,48 +50,55 @@ }, "gunzip": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": ["modules"] + "git_sha": "2c2d1cf80866dbd6dd0ea5d61ddd59533a72d41e", + "installed_by": ["modules"], + "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "samtools/fasta": { "branch": "master", - "git_sha": "9b1071e19265cf9c0d06958a011cf7a9cfe37213", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"], "patch": "modules/nf-core/samtools/fasta/samtools-fasta.diff" }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["modules"] + }, "samtools/index": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", "installed_by": ["modules"] }, "seqtk/subseq": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", + "installed_by": ["modules"], + "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "windowmasker/mkcounts": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", "installed_by": ["modules"] } } diff --git a/modules/local/blobtoolkit/chunk.nf b/modules/local/blobtoolkit/chunk.nf index 73f27532..7dad9182 100644 --- a/modules/local/blobtoolkit/chunk.nf +++ b/modules/local/blobtoolkit/chunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_CHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta) , path(fasta) diff --git a/modules/local/blobtoolkit/config.nf b/modules/local/blobtoolkit/config.nf index d93b85b4..32d4eacd 100644 --- a/modules/local/blobtoolkit/config.nf +++ b/modules/local/blobtoolkit/config.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CONFIG { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "GENERATE_CONFIG module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), val(reads) diff --git a/modules/local/blobtoolkit/countbuscos.nf b/modules/local/blobtoolkit/countbuscos.nf index 203633e1..1b415504 100644 --- a/modules/local/blobtoolkit/countbuscos.nf +++ b/modules/local/blobtoolkit/countbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_COUNTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_COUNTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(table, stageAs: 'dir??/*') diff --git a/modules/local/blobtoolkit/createblobdir.nf b/modules/local/blobtoolkit/createblobdir.nf index 2c8517ab..dfaddb7d 100644 --- a/modules/local/blobtoolkit/createblobdir.nf +++ b/modules/local/blobtoolkit/createblobdir.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(window, stageAs: 'windowstats/*') diff --git a/modules/local/blobtoolkit/extractbuscos.nf b/modules/local/blobtoolkit/extractbuscos.nf index 128780fe..1e4440cb 100644 --- a/modules/local/blobtoolkit/extractbuscos.nf +++ b/modules/local/blobtoolkit/extractbuscos.nf @@ -5,11 +5,11 @@ process BLOBTOOLKIT_EXTRACTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_EXTRACTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(fasta) - tuple val(metaseq), path(seq, stageAs: "lineage??/*") + path seq, stageAs: "lineage??/*" output: tuple val(meta), path("*_buscogenes.fasta"), emit: genes diff --git a/modules/local/blobtoolkit/metadata.nf b/modules/local/blobtoolkit/metadata.nf index 96948345..ffae2a8c 100644 --- a/modules/local/blobtoolkit/metadata.nf +++ b/modules/local/blobtoolkit/metadata.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_METADATA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_METADATA module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(yaml) diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 45f0471a..9b1a262f 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_SUMMARY { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_SUMMARY module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(blobdir) diff --git a/modules/local/blobtoolkit/unchunk.nf b/modules/local/blobtoolkit/unchunk.nf index f9797178..5285b0dc 100644 --- a/modules/local/blobtoolkit/unchunk.nf +++ b/modules/local/blobtoolkit/unchunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UNCHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UNCHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(blast_table) diff --git a/modules/local/blobtoolkit/updateblobdir.nf b/modules/local/blobtoolkit/updateblobdir.nf index cbcdc7b5..50167f8b 100644 --- a/modules/local/blobtoolkit/updateblobdir.nf +++ b/modules/local/blobtoolkit/updateblobdir.nf @@ -5,10 +5,10 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: - tuple val(meta), path(input) + tuple val(meta), path(input, stageAs: "input_blobdir") tuple val(meta1), path(blastx, stageAs: "blastx.txt") tuple val(meta2), path(blastn, stageAs: "blastn.txt") path(taxdump) @@ -26,6 +26,9 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { def hits_blastx = blastx ? "--hits ${blastx}" : "" def hits_blastn = blastn ? "--hits ${blastn}" : "" """ + # In-place modifications are not great in Nextflow, so work on a copy of ${input} + mkdir ${prefix} + cp --preserve=timestamp ${input}/* ${prefix}/ blobtools replace \\ --taxdump ${taxdump} \\ --taxrule bestdistorder=buscoregions \\ @@ -33,7 +36,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { ${hits_blastn} \\ --threads ${task.cpus} \\ $args \\ - ${input} + ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/blobtoolkit/updatemeta.nf b/modules/local/blobtoolkit/updatemeta.nf index 45df4209..de1313d5 100644 --- a/modules/local/blobtoolkit/updatemeta.nf +++ b/modules/local/blobtoolkit/updatemeta.nf @@ -12,8 +12,8 @@ process BLOBTOOLKIT_UPDATEMETA { path versions output: - tuple val(meta), path(prefix), emit: blobdir - path "versions.yml" , emit: versions + tuple val(meta), path("*.json"), emit: json + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,8 +24,9 @@ process BLOBTOOLKIT_UPDATEMETA { """ update_versions.py \\ ${args} \\ - --meta ${input}/meta.json \\ + --meta_in ${input}/meta.json \\ --software ${versions} \\ + --meta_out ${prefix}.meta.json cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/blobtoolkit/windowstats.nf b/modules/local/blobtoolkit/windowstats.nf index 26bd49f5..d432a8ff 100644 --- a/modules/local/blobtoolkit/windowstats.nf +++ b/modules/local/blobtoolkit/windowstats.nf @@ -1,11 +1,10 @@ process BLOBTOOLKIT_WINDOWSTATS { tag "$meta.id" - label 'process_single' if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_WINDOWSTATS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(tsv) diff --git a/modules/local/compressblobdir.nf b/modules/local/compressblobdir.nf new file mode 100644 index 00000000..694d415a --- /dev/null +++ b/modules/local/compressblobdir.nf @@ -0,0 +1,37 @@ +process COMPRESSBLOBDIR { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::pigz=2.8" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(input, stageAs: "input_blobdir") + tuple val(meta1), path(summary_json) + tuple val(meta2), path(meta_json) + + output: + tuple val(meta), path(prefix), emit: blobdir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + cp ${input}/* ${prefix}/ + cp ${summary_json} ${prefix}/summary.json + cp ${meta_json} ${prefix}/meta.json + pigz --processes $task.cpus ${prefix}/*.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ +} diff --git a/modules/local/restructurebuscodir.nf b/modules/local/restructurebuscodir.nf new file mode 100644 index 00000000..4c58b0ed --- /dev/null +++ b/modules/local/restructurebuscodir.nf @@ -0,0 +1,44 @@ +process RESTRUCTUREBUSCODIR { + tag "${meta.id}_${lineage}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), val(lineage), path(batch_summary), path(short_summaries_txt), path(short_summaries_json), path(busco_dir) + + output: + tuple val(meta), path("${lineage}"), emit: clean_busco_dir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${lineage} + + cp --dereference ${batch_summary} ${lineage}/short_summary.tsv + [ -n "${short_summaries_txt}" ] && cp --dereference ${short_summaries_txt} ${lineage}/short_summary.txt + [ -n "${short_summaries_json}" ] && cp --dereference ${short_summaries_json} ${lineage}/short_summary.json + + # Should we compress these ? + [ -e ${busco_dir}/*/run_*/full_table.tsv ] && cp ${busco_dir}/*/run_*/full_table.tsv ${lineage}/ + [ -e ${busco_dir}/*/run_*/missing_busco_list.tsv ] && cp ${busco_dir}/*/run_*/missing_busco_list.tsv ${lineage}/ + + tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences single_copy_busco_sequences + tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences multi_copy_busco_sequences + tar czf ${lineage}/fragmented_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences fragmented_busco_sequences + tar czf ${lineage}/hmmer_output.tar.gz --exclude=.checkpoint -C ${busco_dir}/*/run_* hmmer_output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version| awk 'NR==1 {print \$3}' ) + END_VERSIONS + """ +} diff --git a/modules/local/windowstats_input.nf b/modules/local/windowstats_input.nf index 6fe537b1..4ed7c6d4 100644 --- a/modules/local/windowstats_input.nf +++ b/modules/local/windowstats_input.nf @@ -1,6 +1,5 @@ process WINDOWSTATS_INPUT { tag "$meta.id" - label 'process_single' conda "conda-forge::pandas=1.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/busco/busco.diff b/modules/nf-core/busco/busco.diff index 0a402c4c..775788fb 100644 --- a/modules/nf-core/busco/busco.diff +++ b/modules/nf-core/busco/busco.diff @@ -1,14 +1,15 @@ Changes in module 'nf-core/busco' --- modules/nf-core/busco/main.nf +++ modules/nf-core/busco/main.nf -@@ -1,5 +1,5 @@ +@@ -1,6 +1,5 @@ process BUSCO { - tag "$meta.id" +- label 'process_medium' + tag "${meta.id}_${lineage}" - label 'process_medium' conda "${moduleDir}/environment.yml" -@@ -37,7 +37,7 @@ + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +@@ -37,7 +36,7 @@ def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config $config_file" : '' def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/main.nf index 867238cf..83d8eacd 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/main.nf @@ -1,6 +1,5 @@ process BUSCO { tag "${meta.id}_${lineage}" - label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index aaae04f9..fcee2d19 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { [ [ id:'genome', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -45,8 +45,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -72,8 +72,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -83,7 +83,8 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} ) } } @@ -101,8 +102,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -130,8 +131,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -141,7 +142,8 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} ) } } @@ -158,7 +160,7 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] """ @@ -174,4 +176,3 @@ nextflow_process { } } } - diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 0c9bfe8d..423571ba 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,4 +1,10 @@ { + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, "test_cat_unzipped_unzipped": { "content": [ { @@ -61,36 +67,31 @@ ], "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped": { + "test_cat_zipped_zipped_lines": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] ], - "timestamp": "2024-01-12T14:02:02.999254641" + "timestamp": "2023-10-16T14:33:08.038830506" }, "test_cat_one_file_unzipped_zipped_lines": { "content": [ @@ -105,41 +106,16 @@ ], "timestamp": "2023-10-16T14:33:21.39642399" }, - "test_cat_unzipped_zipped": { + "test_cat_zipped_zipped_size": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } + 78 ], - "timestamp": "2024-01-12T14:08:26.948048418" + "timestamp": "2023-10-16T14:32:33.641869244" }, "test_cat_one_file_unzipped_zipped_size": { "content": [ 374 ], - "timestamp": "2024-01-12T14:10:22.445700266" + "timestamp": "2023-10-16T14:33:21.4094373" } -} +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index f0c63f69..b48ced26 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 7685b33c..105f9265 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test index eec1db10..b1e1630b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -31,7 +31,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } ) } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 4274ed57..5f59a936 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-11-03T14:43:22.157011" + "timestamp": "2024-01-09T23:01:18.710682" } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastawindows/fastawindows.diff b/modules/nf-core/fastawindows/fastawindows.diff new file mode 100644 index 00000000..12f809e6 --- /dev/null +++ b/modules/nf-core/fastawindows/fastawindows.diff @@ -0,0 +1,12 @@ +Changes in module 'nf-core/fastawindows' +--- modules/nf-core/fastawindows/main.nf ++++ modules/nf-core/fastawindows/main.nf +@@ -1,6 +1,5 @@ + process FASTAWINDOWS { + tag "$meta.id" +- label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + +************************************************************ diff --git a/modules/nf-core/fastawindows/main.nf b/modules/nf-core/fastawindows/main.nf index 03cc8c57..40b28436 100644 --- a/modules/nf-core/fastawindows/main.nf +++ b/modules/nf-core/fastawindows/main.nf @@ -1,6 +1,5 @@ process FASTAWINDOWS { tag "$meta.id" - label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index d0317922..6406008e 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -15,10 +15,11 @@ nextflow_process { } process { """ - input[0] = [ - [], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) """ } } diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index de1f3811..cf6e775f 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -6,3 +6,4 @@ channels: dependencies: - bioconda::minimap2=2.24 - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 47cd420c..7030554d 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -1,6 +1,5 @@ process MINIMAP2_ALIGN { tag "$meta.id" - label 'process_medium' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" @@ -24,22 +23,35 @@ process MINIMAP2_ALIGN { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ minimap2 \\ $args \\ -t $task.cpus \\ - "${reference ?: reads}" \\ - "$reads" \\ + ${reference ?: reads} \\ + $reads \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + """ + touch $output_file + cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff new file mode 100644 index 00000000..479818b3 --- /dev/null +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -0,0 +1,12 @@ +Changes in module 'nf-core/minimap2/align' +--- modules/nf-core/minimap2/align/main.nf ++++ modules/nf-core/minimap2/align/main.nf +@@ -1,6 +1,5 @@ + process MINIMAP2_ALIGN { + tag "$meta.id" +- label 'process_medium' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + +************************************************************ diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index b634468b..4d77e0d9 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -142,4 +142,38 @@ nextflow_process { } + test("sarscov2 - fastq, fasta, false, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.paf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + } diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index a39a1697..ec99d13e 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -6,6 +6,10 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:06.01315354" }, "sarscov2 - fastq, fasta, true, false, false - stub": { @@ -15,8 +19,25 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:24.487175659" }, + "sarscov2 - fastq, fasta, false, false, false - stub": { + "content": [ + "test.paf", + [ + "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-01T11:06:54.090105" + }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ "test.bam", @@ -24,6 +45,10 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:12.50816279" }, "sarscov2 - fastq, [], true, false, false": { @@ -33,6 +58,10 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:18.414974788" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index bc0bdb5b..ca39fb67 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.18 + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d2..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : - 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660e..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad217..f1c4242e 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -3,19 +3,17 @@ nextflow_process { name "Test Process MULTIQC" script "../main.nf" process "MULTIQC" + tag "modules" tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -26,23 +24,20 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] @@ -53,9 +48,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/fasta/environment.yml b/modules/nf-core/samtools/fasta/environment.yml index 05cb8a8e..14585013 100644 --- a/modules/nf-core/samtools/fasta/environment.yml +++ b/modules/nf-core/samtools/fasta/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/fasta/main.nf b/modules/nf-core/samtools/fasta/main.nf index 4b0cad9a..9aa03430 100644 --- a/modules/nf-core/samtools/fasta/main.nf +++ b/modules/nf-core/samtools/fasta/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FASTA { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..bd57cb54 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..eb5f5252 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..97991358 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..24c3c04b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..a76fc27e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "flagstat": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:44.299617452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 00000000..2d2b7255 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 296ed99e..a5e50649 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8ad18fdc..dc14f98d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index c76a9169..bb7756d1 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/index" - test("sarscov2 [BAI]") { + test("bai") { when { params { @@ -16,10 +16,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -28,12 +28,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.bai).match("bai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("bai_versions") } ) } } - test("homo_sapiens [CRAI]") { + test("crai") { when { params { @@ -41,10 +41,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) """ } } @@ -53,12 +53,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.crai).match("crai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("crai_versions") } ) } } - test("homo_sapiens [CSI]") { + test("csi") { config "./csi.nextflow.config" @@ -68,10 +68,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -80,7 +80,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("csi_versions") } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index b3baee7f..3dc8e7de 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,28 +1,74 @@ { + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, "crai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" ] ] ], - "timestamp": "2023-11-15T15:17:37.30801" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" }, "bai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" ] ] ], - "timestamp": "2023-11-15T15:17:30.869234" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index 99aa69d0..b0676f33 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 0b5a2912..5a8989d6 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test index 89ed3555..45a0defb 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -9,16 +9,16 @@ nextflow_process { tag "samtools" tag "samtools/view" - test("sarscov2 - [bam, []], [], []") { + test("bam") { when { process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), [] - ] + ]) input[1] = [[],[]] input[2] = [] """ @@ -28,34 +28,31 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - process.out.bai, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } ) } - } - test("homo_sapiens - [cram, crai], fasta, []") { + test("cram") { when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = [] """ } @@ -64,36 +61,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.cram[0][1]).name, - process.out.bam, - process.out.sam, - process.out.bai, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } ) } - } - test("homo_sapiens - [cram, []], fasta, [] - bam output") { + test("cram_to_bam") { config "./bam.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = [] """ } @@ -102,36 +96,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - process.out.bai, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } ) } - } - test("homo_sapiens - [cram, []], fasta, [] - bam & index output") { + test("cram_to_bam_index") { config "./bam_index.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = [] """ } @@ -140,36 +131,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.bai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } ) } - } - test("homo_sapiens - [cram, []], fasta, qname - bam & index output") { + test("cram_to_bam_index_qname") { config "./bam_index.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) """ } @@ -178,21 +166,18 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.bai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } ) } - } - test("sarscov2 - [bam, []], [], [] - stub") { + test("bam_stub") { options "-stub" config "./bam_index.config" @@ -200,11 +185,11 @@ nextflow_process { when { process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), [] - ] + ]) input[1] = [[],[]] input[2] = [] """ @@ -214,18 +199,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.bai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } ) } - } - } diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap index 83427491..f55943a7 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -1,140 +1,488 @@ { - "homo_sapiens - [cram, []], fasta, [] - bam output": { + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ [ - - ], + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:09.713353823" + }, + "cram_to_bam_index_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ [ - ], - [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" ] ], - "timestamp": "2023-12-04T17:41:17.563069206" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" }, - "sarscov2 - [bam, []], [], []": { + "cram_to_bam_index_sam": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ [ - ], - [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" ] ], - "timestamp": "2023-12-04T17:41:03.206994564" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" }, - "homo_sapiens - [cram, []], fasta, qname - bam & index output": { + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "bam_csi": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ [ - ], - "test.bam.csi", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ [ - - ], + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:03.935041046" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" ] ], - "timestamp": "2023-12-04T17:44:39.165289759" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:55.910685496" }, - "homo_sapiens - [cram, []], fasta, [] - bam & index output": { + "cram_to_bam_bai": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ [ - - ], - "test.bam.csi", + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:47.715221169" + }, + "cram_bam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] ], - "timestamp": "2023-12-04T17:44:32.25731224" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" }, - "sarscov2 - [bam, []], [], [] - stub": { + "cram_to_bam_index_qname_crai": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ [ - ], - "test.csi", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:31.692607421" + }, + "cram_to_bam_index_qname_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] ], - "timestamp": "2023-12-04T17:44:45.81037195" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" }, - "homo_sapiens - [cram, crai], fasta, []": { + "bam_cram": { "content": [ - "test.cram", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:39.913411036" + }, + "bam_sam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] ], - "timestamp": "2023-12-04T17:41:10.730011823" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" } } \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml index 42c4e8af..7abe3644 100644 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ b/modules/nf-core/seqtk/subseq/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::seqtk=1.3 + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf index 91d2dff3..5edae0e8 100644 --- a/modules/nf-core/seqtk/subseq/main.nf +++ b/modules/nf-core/seqtk/subseq/main.nf @@ -4,24 +4,24 @@ process SEQTK_SUBSEQ { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : - 'biocontainers/seqtk:1.3--h5bf99c6_3' }" + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" input: - path sequences + tuple val(meta), path(sequences) path filter_list output: - path "*.gz" , emit: sequences - path "versions.yml" , emit: versions + tuple val(meta), path("*.${ext}"), emit: sequences + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: '' - def ext = "fa" + def prefix = task.ext.prefix ?: "${meta.id}" + ext = "fa" if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { ext = "fq" } @@ -30,8 +30,22 @@ process SEQTK_SUBSEQ { subseq \\ $args \\ $sequences \\ - $filter_list | \\ - gzip --no-name > ${sequences}${prefix}.${ext}.gz + $filter_list > ${sequences}${prefix}.${ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ + touch ${sequences}${prefix}.${ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml index 2cb8858d..4e8ee19f 100644 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ b/modules/nf-core/seqtk/subseq/meta.yml @@ -1,7 +1,9 @@ name: seqtk_subseq description: Select only sequences that match the filtering condition keywords: - - filtering,selection + - filtering + - selection + - fastx tools: - seqtk: description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format diff --git a/modules/nf-core/seqtk/subseq/seqtk-subseq.diff b/modules/nf-core/seqtk/subseq/seqtk-subseq.diff new file mode 100644 index 00000000..5fc2af8a --- /dev/null +++ b/modules/nf-core/seqtk/subseq/seqtk-subseq.diff @@ -0,0 +1,48 @@ +Changes in module 'nf-core/seqtk/subseq' +--- modules/nf-core/seqtk/subseq/main.nf ++++ modules/nf-core/seqtk/subseq/main.nf +@@ -12,7 +12,7 @@ + path filter_list + + output: +- tuple val(meta), path("*.gz"), emit: sequences ++ tuple val(meta), path("*.${ext}"), emit: sequences + path "versions.yml", emit: versions + + when: +@@ -21,7 +21,7 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def ext = "fa" ++ ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } +@@ -30,8 +30,7 @@ + subseq \\ + $args \\ + $sequences \\ +- $filter_list | \\ +- gzip --no-name > ${sequences}${prefix}.${ext}.gz ++ $filter_list > ${sequences}${prefix}.${ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -41,12 +40,12 @@ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" +- def ext = "fa" ++ ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ +- echo "" | gzip > ${sequences}${prefix}.${ext}.gz ++ touch ${sequences}${prefix}.${ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test new file mode 100644 index 00000000..be5602e3 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SEQTK_SUBSEQ" + script "modules/nf-core/seqtk/subseq/main.nf" + process "SEQTK_SUBSEQ" + config "./standard.config" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/subseq" + + test("sarscov2_subseq_fa") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2_subseq_fa_stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap new file mode 100644 index 00000000..75b3793e --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_subseq_fa": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:36.155954" + }, + "sarscov2_subseq_fa_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:44.222329" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config new file mode 100644 index 00000000..e8d7dc30 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/standard.config @@ -0,0 +1,5 @@ +process { + withName: SEQTK_SUBSEQ { + ext.prefix = { ".filtered" } + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml new file mode 100644 index 00000000..74056bab --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/subseq: + - "modules/nf-core/seqtk/subseq/**" diff --git a/modules/nf-core/windowmasker/mkcounts/environment.yml b/modules/nf-core/windowmasker/mkcounts/environment.yml index 15887425..e4d72108 100644 --- a/modules/nf-core/windowmasker/mkcounts/environment.yml +++ b/modules/nf-core/windowmasker/mkcounts/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::blast=2.14.0 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/mkcounts/main.nf b/modules/nf-core/windowmasker/mkcounts/main.nf index 6bfd175e..406f7761 100644 --- a/modules/nf-core/windowmasker/mkcounts/main.nf +++ b/modules/nf-core/windowmasker/mkcounts/main.nf @@ -4,8 +4,8 @@ process WINDOWMASKER_MKCOUNTS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': - 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta), path(ref) @@ -21,11 +21,11 @@ process WINDOWMASKER_MKCOUNTS { def args = task.ext.args ?: "" def prefix = task.ext.prefix ?: "${meta.id}" - def memory = 3072 + def memory = 3072 if (!task.memory) { log.info '[WINDOWMASKER: MK_COUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - memory = (task.memory.toMega()).intValue() + memory = (task.memory.toMega()).intValue() } """ diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test new file mode 100644 index 00000000..18c4977c --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process WINDOWMASKER_MKCOUNTS" + script "../main.nf" + process "WINDOWMASKER_MKCOUNTS" + + tag "modules" + tag "modules_nfcore" + tag "windowmasker" + tag "windowmasker/mkcounts" + + test("sarscov2_fasta") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + process { + """ + input[0] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("sarscov2_fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap new file mode 100644 index 00000000..cae2d306 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,5f5d7e926fdf13b0c57651f962cc1253" + ] + ], + "1": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ], + "counts": [ + [ + { + "id": "test" + }, + "test.txt:md5,5f5d7e926fdf13b0c57651f962cc1253" + ] + ], + "versions": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ] + } + ], + "timestamp": "2024-02-15T13:29:58.837482" + }, + "sarscov2_fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ], + "counts": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ] + } + ], + "timestamp": "2024-02-15T13:30:07.618636" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config b/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config new file mode 100644 index 00000000..65fc1910 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: WINDOWMASKER_MKCOUNTS { + ext.args = "" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/mkcounts/tests/tags.yml b/modules/nf-core/windowmasker/mkcounts/tests/tags.yml new file mode 100644 index 00000000..95c67635 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/tags.yml @@ -0,0 +1,2 @@ +windowmasker/mkcounts: + - "modules/nf-core/windowmasker/mkcounts/**" diff --git a/modules/nf-core/windowmasker/ustat/environment.yml b/modules/nf-core/windowmasker/ustat/environment.yml index a97fdd9d..b83d82e5 100644 --- a/modules/nf-core/windowmasker/ustat/environment.yml +++ b/modules/nf-core/windowmasker/ustat/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::blast=2.14.0 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/ustat/main.nf b/modules/nf-core/windowmasker/ustat/main.nf index 2cc3df63..7a7d29f6 100644 --- a/modules/nf-core/windowmasker/ustat/main.nf +++ b/modules/nf-core/windowmasker/ustat/main.nf @@ -4,8 +4,8 @@ process WINDOWMASKER_USTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': - 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta) , path(counts) diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test b/modules/nf-core/windowmasker/ustat/tests/main.nf.test new file mode 100644 index 00000000..58d91b13 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process WINDOWMASKER_USTAT" + script "../main.nf" + process "WINDOWMASKER_USTAT" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "windowmasker" + tag "windowmasker/ustat" + tag "windowmasker/mkcounts" + + + setup { + run("WINDOWMASKER_MKCOUNTS") { + script "../../mkcounts/main.nf" + process { + """ + input[0] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + } + + test("sarscov2_fasta") { + when { + process { + """ + input[0] = WINDOWMASKER_MKCOUNTS.out.counts + input[1] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + then { + assert process.success + assert snapshot(process.out).match() + } + } + + test("sarscov2_fasta_stub") { + when { + process { + """ + input[0] = WINDOWMASKER_MKCOUNTS.out.counts + input[1] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap b/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap new file mode 100644 index 00000000..79d3d82d --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "1": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ], + "intervals": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "versions": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ] + } + ], + "timestamp": "2024-02-15T14:19:12.033774" + }, + "sarscov2_fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "1": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ], + "intervals": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "versions": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ] + } + ], + "timestamp": "2024-02-15T14:19:21.850526" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/ustat/tests/nextflow.config b/modules/nf-core/windowmasker/ustat/tests/nextflow.config new file mode 100644 index 00000000..00b63c45 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'test_windowmasker_ustat:WINDOWMASKER_USTAT' { + ext.args = "-dust true -outfmt interval" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/ustat/tests/tags.yml b/modules/nf-core/windowmasker/ustat/tests/tags.yml new file mode 100644 index 00000000..28c74ca9 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/tags.yml @@ -0,0 +1,2 @@ +windowmasker/ustat: + - "modules/nf-core/windowmasker/ustat/**" diff --git a/nextflow.config b/nextflow.config index 6c9fadf8..83aaaafc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -234,6 +234,7 @@ report { trace { enabled = true file = "${params.outdir}/pipeline_info/blobtoolkit/execution_trace_${trace_timestamp}.txt" + fields = 'task_id,hash,native_id,process,tag,status,exit,cpus,memory,time,attempt,submit,start,complete,duration,%cpu,%mem,peak_rss,rchar,wchar' } dag { enabled = true @@ -247,7 +248,7 @@ manifest { description = """Quality assessment of genome assemblies""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.3.0' + version = '0.4.0' doi = '10.5281/zenodo.7949058' } @@ -286,3 +287,31 @@ def check_max(obj, type) { } } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Increasing the number of CPUs often gives diminishing returns, so we increase it + following a logarithm curve: + - 0 < value <= 1 : start + step + - 1 < value <= base : start + 2*step + - base < value <= base^2: start + 3*step + - base^2 < value <= base^3: start + 4*step + - etc + In order to support re-runs, start and/or step may be increased by a function of the + attempt number prior to calling this function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Modified logarithm function that doesn't return negative numbers +def positive_log(value, base) { + if (value <= 1) { + return 0 + } else { + return Math.log(value)/Math.log(base) + } +} + +def log_increase_cpus(start, step, value, base) { + return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus') +} + diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index a43b26dd..c3ebe104 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -6,6 +6,7 @@ include { GOAT_TAXONSEARCH } from '../../modules/nf-core/goat/taxonsear include { BUSCO } from '../../modules/nf-core/busco/main' include { BLOBTOOLKIT_EXTRACTBUSCOS } from '../../modules/local/blobtoolkit/extractbuscos' include { DIAMOND_BLASTP } from '../../modules/nf-core/diamond/blastp/main' +include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebuscodir' workflow BUSCO_DIAMOND { @@ -42,31 +43,61 @@ workflow BUSCO_DIAMOND { // - // Run BUSCO search + // Prepare the BUSCO linages // + // 0. Initialise sone variables + basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ] + def lineage_position = 0 + // 1. Parse the GOAT_TAXONSEARCH output GOAT_TAXONSEARCH.out.taxonsearch | map { meta, csv -> csv.splitCsv(header:true, sep:'\t', strip:true) } | map { row -> row.odb10_lineage.findAll { it != "" } } - | set { ch_ancestral_lineages } - - - // Add the basal lineages to the list (excluding duplicates) - basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ] - ch_ancestral_lineages + // 2. Add the (missing) basal lineages | map { lineages -> (lineages + basal_lineages).unique() } | flatten () - | set { ch_lineages } + // 3. Add a (0-based) index to record the original order (i.e. by age) + | map { lineage_name -> [lineage_name, lineage_position++] } + // 4. Move the lineage information to `meta` to be able to distinguish the BUSCO jobs and group their outputs later + | combine ( fasta ) + | map { lineage_name, lineage_index, meta, genome -> [meta + [lineage_name: lineage_name, lineage_index: lineage_index], genome] } + | set { ch_fasta_with_lineage } + - BUSCO ( fasta, "genome", ch_lineages, busco_db.collect().ifEmpty([]), [] ) + // + // Run BUSCO search + // + BUSCO ( + ch_fasta_with_lineage, + "genome", + ch_fasta_with_lineage.map { it[0].lineage_name }, + busco_db.collect().ifEmpty([]), + [], + ) ch_versions = ch_versions.mix ( BUSCO.out.versions.first() ) + // + // Tidy up the BUSCO output directories before publication + // + RESTRUCTUREBUSCODIR( + BUSCO.out.seq_dir + | map { meta, seq -> [meta, meta.lineage_name] } + | join ( BUSCO.out.batch_summary ) + | join ( BUSCO.out.short_summaries_txt, remainder: true ) + | join ( BUSCO.out.short_summaries_json, remainder: true ) + | join ( BUSCO.out.busco_dir ) + | map { meta, lineage, batch_summary, short_summaries_txt, short_summaries_json, busco_dir -> [meta, lineage, batch_summary, short_summaries_txt ?: [], short_summaries_json ?: [], busco_dir] } + ) + ch_versions = ch_versions.mix ( RESTRUCTUREBUSCODIR.out.versions.first() ) + + // // Select input for BLOBTOOLKIT_EXTRACTBUSCOS // BUSCO.out.seq_dir - | filter { meta, seq -> basal_lineages.contains(seq.parent.baseName.minus("run_")) } - | groupTuple() + | filter { meta, seq -> basal_lineages.contains(meta.lineage_name) } + | map { meta, seq -> seq } + | collect | set { ch_basal_buscos } @@ -86,20 +117,14 @@ workflow BUSCO_DIAMOND { ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() ) - // Index the lineages in the taxonomic order - def lineage_position = 0 - ch_lineages - | map { lineage -> [lineage, lineage_position++] } - | set { ch_ordered_lineages } - - - // Order BUSCO results according to ch_ordered_lineages + // Order BUSCO results according to the lineage index BUSCO.out.full_table - | map { meta, table -> [table.parent.baseName.minus("run_"), meta, table] } - | join ( ch_ordered_lineages ) - | map { lineage, meta, table, index -> [meta, table, index] } + // 1. Restore the original meta map, and pull the index as an extra tuple element + | map { meta, table -> [meta.findAll { it.key != "lineage_name" && it.key != "lineage_index" }, [table, meta.lineage_index]] } + // 2. Turn to a single-element channel that has the (one and only) meta map, and all the pairs (table, lineage index) concatenated as a list | groupTuple() - | map { meta, tables, positions -> [ meta, tables.withIndex().sort { a, b -> positions[a[1]] <=> positions[b[1]] } . collect { table, i -> table } ] } + // 3. Sort the pairs and discard the index + | map { meta, table_positions -> [ meta, table_positions.sort { a, b -> a[1] <=> b[1] } . collect { table, lineage_index -> table } ] } | set { ch_indexed_buscos } diff --git a/subworkflows/local/finalise_blobdir.nf b/subworkflows/local/finalise_blobdir.nf new file mode 100644 index 00000000..ffbbd534 --- /dev/null +++ b/subworkflows/local/finalise_blobdir.nf @@ -0,0 +1,33 @@ +// +// Final edits to the blobdir +// + +include { BLOBTOOLKIT_UPDATEMETA } from '../../modules/local/blobtoolkit/updatemeta' +include { COMPRESSBLOBDIR } from '../../modules/local/compressblobdir' + +workflow FINALISE_BLOBDIR { + take: + blobdir // channel: [ val(meta), path(blobdir) ] + software // channel: [ val(meta), path(software_yml) ] + summary // channel: [ val(meta), path(summary_json) ] + + + main: + ch_versions = Channel.empty() + + // + // MODULE: Update meta json file + // + BLOBTOOLKIT_UPDATEMETA ( blobdir, software ) + + // + // MODULE: Compress all the json files + // + COMPRESSBLOBDIR ( blobdir, summary, BLOBTOOLKIT_UPDATEMETA.out.json ) + ch_versions = ch_versions.mix ( COMPRESSBLOBDIR.out.versions.first() ) + + + emit: + blobdir = COMPRESSBLOBDIR.out.blobdir // channel: [ val(meta), path(json) ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 5b028911..da522ca8 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -3,6 +3,7 @@ // include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' include { FETCHNGSSAMPLESHEET_CHECK } from '../../modules/local/fetchngssamplesheet_check' include { BLOBTOOLKIT_CONFIG } from '../../modules/local/blobtoolkit/config' @@ -35,27 +36,37 @@ workflow INPUT_CHECK { | map { meta, file -> meta.row + [fastq_1: file] } | mix ( reads_pairedness.not_paired ) | map { create_data_channels_from_fetchngs(it) } - | set { aln } + | set { read_files } } else { SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) .map { create_data_channels(it) } - .set { aln } + .set { read_files } ch_versions = ch_versions.mix ( SAMPLESHEET_CHECK.out.versions.first() ) } + // Extract the read counts + SAMTOOLS_FLAGSTAT ( read_files.map { meta, datafile -> [meta, datafile, []] } ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + + read_files + | join( SAMTOOLS_FLAGSTAT.out.flagstat ) + | map { meta, datafile, stats -> [meta + get_read_counts(stats), datafile] } + | set { reads } + + if ( !params.yaml ) { - aln + read_files | map { meta, data -> meta.id.split("_")[0..-2].join("_") } | combine ( fasta ) | map { sample, meta, fasta -> [ meta, sample ] } | groupTuple() - | set { reads } + | set { grouped_reads } - BLOBTOOLKIT_CONFIG ( reads, fasta ) + BLOBTOOLKIT_CONFIG ( grouped_reads, fasta ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CONFIG.out.versions.first() ) ch_config = BLOBTOOLKIT_CONFIG.out.yaml } else { @@ -63,7 +74,7 @@ workflow INPUT_CHECK { } emit: - aln // channel: [ val(meta), path(datafile) ] + reads // channel: [ val(meta), path(datafile) ] config = ch_config // channel: [ val(meta), path(yaml) ] versions = ch_versions // channel: [ versions.yml ] } @@ -79,8 +90,8 @@ def create_data_channels(LinkedHashMap row) { // add path(s) of the read file(s) to the meta map def data_meta = [] - if ( !params.align && (row.datafile.endsWith(".fastq") || row.datafile.endsWith(".fastq.gz")) ) { - exit 1, "ERROR: Please check input samplesheet and pipeline parameters -> Data file is in FastQ format but --align is not set!\n${row.datafile}" + if ( !params.align && !row.datafile.endsWith(".bam") && !row.datafile.endsWith(".cram") ) { + exit 1, "ERROR: Please check input samplesheet and pipeline parameters -> Data file is in FastA/FastQ format but --align is not set!\n${row.datafile}" } if ( !file(row.datafile).exists() ) { @@ -127,3 +138,20 @@ def create_data_channels_from_fetchngs(LinkedHashMap row) { return data_meta } +// Function to get the read counts from a samtools flagstat file +def get_read_counts ( stats ) { + // create meta map + def read_count_meta = [:] + + // Read the first line of the flagstat file + // 3127898040 + 0 in total (QC-passed reads + QC-failed reads) + // and make the sum of both integers + stats.withReader { + line = it.readLine() + def lspl = line.split() + def read_count = lspl[0].toLong() + lspl[2].toLong() + read_count_meta.read_count = read_count + } + + return read_count_meta +} diff --git a/subworkflows/local/minimap_alignment.nf b/subworkflows/local/minimap_alignment.nf index e0b479bc..1d6263b3 100644 --- a/subworkflows/local/minimap_alignment.nf +++ b/subworkflows/local/minimap_alignment.nf @@ -24,6 +24,7 @@ workflow MINIMAP2_ALIGNMENT { input | branch { meta, reads -> + fasta: reads.toString().endsWith(".fasta") || reads.toString().endsWith(".fasta.gz") || reads.toString().endsWith(".fa") || reads.toString().endsWith(".fa.gz") fastq: reads.toString().endsWith(".fastq") || reads.toString().endsWith(".fastq.gz") || reads.toString().endsWith(".fq") || reads.toString().endsWith(".fq.gz") bamcram: true } @@ -35,6 +36,7 @@ workflow MINIMAP2_ALIGNMENT { // Branch input by sequencing type SAMTOOLS_FASTA.out.interleaved + | mix ( ch_reads_by_type.fasta ) | mix ( ch_reads_by_type.fastq ) | branch { meta, reads -> diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index d1e31a72..0b426fae 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -20,12 +20,18 @@ workflow PREPARE_GENOME { // MODULE: Decompress FASTA file if needed // if ( params.fasta.endsWith('.gz') ) { - ch_genome = GUNZIP ( fasta ).gunzip + ch_unzipped = GUNZIP ( fasta ).gunzip ch_versions = ch_versions.mix ( GUNZIP.out.versions ) } else { - ch_genome = fasta + ch_unzipped = fasta } + // + // LOGIC: Extract the genome size for decision making downstream + // + ch_unzipped + | map { meta, fa -> [ meta + [genome_size: fa.size()], fa] } + | set { ch_genome } // // MODULES: Mask the genome if needed @@ -46,4 +52,4 @@ workflow PREPARE_GENOME { emit: genome = ch_fasta // channel: [ meta, path(genome) ] versions = ch_versions // channel: [ versions.yml ] -} \ No newline at end of file +} diff --git a/subworkflows/local/run_blastn.nf b/subworkflows/local/run_blastn.nf index 5e3c913f..cc1fa6c5 100644 --- a/subworkflows/local/run_blastn.nf +++ b/subworkflows/local/run_blastn.nf @@ -5,7 +5,6 @@ include { NOHIT_LIST } from '../../modules/local/nohit_list' include { SEQTK_SUBSEQ } from '../../modules/nf-core/seqtk/subseq/main' -include { GUNZIP } from '../../modules/nf-core/gunzip/main' include { BLOBTOOLKIT_CHUNK } from '../../modules/local/blobtoolkit/chunk' include { BLAST_BLASTN as BLASTN_TAXON } from '../../modules/nf-core/blast/blastn/main' include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' @@ -29,23 +28,17 @@ workflow RUN_BLASTN { NOHIT_LIST ( blast_table, fasta ) ch_versions = ch_versions.mix ( NOHIT_LIST.out.versions.first() ) - // Subset of sequences with no hits (meta is not propagated in this step) + // Subset of sequences with no hits SEQTK_SUBSEQ ( - fasta.map { meta, genome -> genome }, + fasta, NOHIT_LIST.out.nohitlist.map { meta, nohit -> nohit } ) ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() ) // Split long contigs into chunks - // add meta to fasta subset channel: [ val(meta), path(compressed_fasta) ] - ch_gz = fasta.combine(SEQTK_SUBSEQ.out.sequences).map { meta, genome, seq -> [ meta, seq ] } - - // uncompress fasta - GUNZIP ( ch_gz ) - // create chunks - BLOBTOOLKIT_CHUNK ( GUNZIP.out.gunzip, [[],[]] ) + BLOBTOOLKIT_CHUNK ( SEQTK_SUBSEQ.out.sequences, [[],[]] ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CHUNK.out.versions.first() ) diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index 944ccc4c..f25da1eb 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -55,7 +55,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // MODULE: Loaded from modules/local/ // include { BLOBTOOLKIT_CONFIG } from '../modules/local/blobtoolkit/config' -include { BLOBTOOLKIT_UPDATEMETA } from '../modules/local/blobtoolkit/updatemeta' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -70,6 +69,7 @@ include { RUN_BLASTN } from '../subworkflows/local/run_blastn' include { COLLATE_STATS } from '../subworkflows/local/collate_stats' include { BLOBTOOLS } from '../subworkflows/local/blobtools' include { VIEW } from '../subworkflows/local/view' +include { FINALISE_BLOBDIR } from '../subworkflows/local/finalise_blobdir' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -105,18 +105,18 @@ workflow BLOBTOOLKIT { // // SUBWORKFLOW: Check samplesheet and create channels for downstream analysis // - INPUT_CHECK ( ch_input, ch_fasta, ch_yaml ) + INPUT_CHECK ( ch_input, PREPARE_GENOME.out.genome, ch_yaml ) ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) // // SUBWORKFLOW: Optional read alignment // if ( params.align ) { - MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.aln, PREPARE_GENOME.out.genome ) + MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.reads, PREPARE_GENOME.out.genome ) ch_versions = ch_versions.mix ( MINIMAP2_ALIGNMENT.out.versions ) ch_aligned = MINIMAP2_ALIGNMENT.out.aln } else { - ch_aligned = INPUT_CHECK.out.aln + ch_aligned = INPUT_CHECK.out.reads } // @@ -130,9 +130,9 @@ workflow BLOBTOOLKIT { // if (params.taxa_file) { ch_taxa = Channel.from(params.taxa_file) - ch_taxon_taxa = ch_fasta.combine(ch_taxon).combine(ch_taxa).map { meta, fasta, taxon, taxa -> [ meta, taxon, taxa ] } + ch_taxon_taxa = PREPARE_GENOME.out.genome.combine(ch_taxon).combine(ch_taxa).map { meta, fasta, taxon, taxa -> [ meta, taxon, taxa ] } } else { - ch_taxon_taxa = ch_fasta.combine(ch_taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] } + ch_taxon_taxa = PREPARE_GENOME.out.genome.combine(ch_taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] } } BUSCO_DIAMOND ( @@ -208,9 +208,14 @@ workflow BLOBTOOLKIT { ) // - // MODULE: Update meta json file + // SUBWORKFLOW: Finalise and publish the blobdir // - BLOBTOOLKIT_UPDATEMETA ( BLOBTOOLS.out.blobdir, CUSTOM_DUMPSOFTWAREVERSIONS.out.yml ) + FINALISE_BLOBDIR ( + BLOBTOOLS.out.blobdir, + CUSTOM_DUMPSOFTWAREVERSIONS.out.yml, + VIEW.out.summary + ) + // Don't update ch_versions because it's already been consumed by now //
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow