From 196427afedd7b7c447a6ae60a1f8e9918fe75e70 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Feb 2024 12:54:13 +0000 Subject: [PATCH 01/36] Upgraded the BTK container version to enable better reporting of a wrong accession being used --- modules/local/blobtoolkit/chunk.nf | 2 +- modules/local/blobtoolkit/config.nf | 2 +- modules/local/blobtoolkit/countbuscos.nf | 2 +- modules/local/blobtoolkit/createblobdir.nf | 2 +- modules/local/blobtoolkit/extractbuscos.nf | 2 +- modules/local/blobtoolkit/metadata.nf | 2 +- modules/local/blobtoolkit/summary.nf | 2 +- modules/local/blobtoolkit/unchunk.nf | 2 +- modules/local/blobtoolkit/updateblobdir.nf | 2 +- modules/local/blobtoolkit/windowstats.nf | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/local/blobtoolkit/chunk.nf b/modules/local/blobtoolkit/chunk.nf index 73f27532..28ec6886 100644 --- a/modules/local/blobtoolkit/chunk.nf +++ b/modules/local/blobtoolkit/chunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_CHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta) , path(fasta) diff --git a/modules/local/blobtoolkit/config.nf b/modules/local/blobtoolkit/config.nf index d93b85b4..38affea1 100644 --- a/modules/local/blobtoolkit/config.nf +++ b/modules/local/blobtoolkit/config.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CONFIG { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "GENERATE_CONFIG module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), val(reads) diff --git a/modules/local/blobtoolkit/countbuscos.nf b/modules/local/blobtoolkit/countbuscos.nf index 203633e1..6446f089 100644 --- a/modules/local/blobtoolkit/countbuscos.nf +++ b/modules/local/blobtoolkit/countbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_COUNTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_COUNTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(table, stageAs: 'dir??/*') diff --git a/modules/local/blobtoolkit/createblobdir.nf b/modules/local/blobtoolkit/createblobdir.nf index 2c8517ab..c27d7898 100644 --- a/modules/local/blobtoolkit/createblobdir.nf +++ b/modules/local/blobtoolkit/createblobdir.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(window, stageAs: 'windowstats/*') diff --git a/modules/local/blobtoolkit/extractbuscos.nf b/modules/local/blobtoolkit/extractbuscos.nf index 128780fe..9ac93f25 100644 --- a/modules/local/blobtoolkit/extractbuscos.nf +++ b/modules/local/blobtoolkit/extractbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_EXTRACTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_EXTRACTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(fasta) diff --git a/modules/local/blobtoolkit/metadata.nf b/modules/local/blobtoolkit/metadata.nf index 96948345..6d29be8a 100644 --- a/modules/local/blobtoolkit/metadata.nf +++ b/modules/local/blobtoolkit/metadata.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_METADATA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_METADATA module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(yaml) diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 45f0471a..32fac238 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_SUMMARY { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_SUMMARY module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(blobdir) diff --git a/modules/local/blobtoolkit/unchunk.nf b/modules/local/blobtoolkit/unchunk.nf index f9797178..2e47854f 100644 --- a/modules/local/blobtoolkit/unchunk.nf +++ b/modules/local/blobtoolkit/unchunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UNCHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UNCHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(blast_table) diff --git a/modules/local/blobtoolkit/updateblobdir.nf b/modules/local/blobtoolkit/updateblobdir.nf index cbcdc7b5..6bf975b9 100644 --- a/modules/local/blobtoolkit/updateblobdir.nf +++ b/modules/local/blobtoolkit/updateblobdir.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(input) diff --git a/modules/local/blobtoolkit/windowstats.nf b/modules/local/blobtoolkit/windowstats.nf index 26bd49f5..f3013701 100644 --- a/modules/local/blobtoolkit/windowstats.nf +++ b/modules/local/blobtoolkit/windowstats.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_WINDOWSTATS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_WINDOWSTATS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.3" + container "docker.io/genomehubs/blobtoolkit:4.3.4" input: tuple val(meta), path(tsv) From b65e399a7e9f1bbdc8601437615fc0904e912af4 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Feb 2024 13:38:52 +0000 Subject: [PATCH 02/36] Updated the minimap2/align module --- modules.json | 2 +- modules/nf-core/minimap2/align/environment.yml | 1 + modules/nf-core/minimap2/align/main.nf | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/modules.json b/modules.json index 7ba1a8db..c458f840 100644 --- a/modules.json +++ b/modules.json @@ -55,7 +55,7 @@ }, "minimap2/align": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "efbf86bb487f288ac30660282709d9620dd6048e", "installed_by": ["modules"] }, "multiqc": { diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index de1f3811..cf6e775f 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -6,3 +6,4 @@ channels: dependencies: - bioconda::minimap2=2.24 - bioconda::samtools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 47cd420c..07a32158 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -24,9 +24,10 @@ process MINIMAP2_ALIGN { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ From 0f509b491af2b82a791d51bfd5342d7d00ddd893 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 26 Feb 2024 09:09:26 +0000 Subject: [PATCH 03/36] Same format for the trace files as the readmapping and genomenote pipelines --- nextflow.config | 1 + 1 file changed, 1 insertion(+) diff --git a/nextflow.config b/nextflow.config index 6c9fadf8..43d996ce 100644 --- a/nextflow.config +++ b/nextflow.config @@ -234,6 +234,7 @@ report { trace { enabled = true file = "${params.outdir}/pipeline_info/blobtoolkit/execution_trace_${trace_timestamp}.txt" + fields = 'task_id,hash,native_id,process,tag,status,exit,cpus,memory,time,attempt,submit,start,complete,duration,%cpu,%mem,peak_rss,rchar,wchar' } dag { enabled = true From 0e3a4a02452d2e6f23e2430425e8ead8fe01a810 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 15 Mar 2024 13:28:48 +0000 Subject: [PATCH 04/36] bugfix: put the summary.json in the blobdir so that it is published too --- modules/local/blobtoolkit/summary.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 32fac238..5cd6714f 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -23,7 +23,7 @@ process BLOBTOOLKIT_SUMMARY { """ blobtools filter \\ ${args} \\ - --summary ${prefix}.summary.json ${blobdir} + --summary ${blobdir}/${prefix}.summary.json ${blobdir} cat <<-END_VERSIONS > versions.yml "${task.process}": From fc7c576171f4490c69767f18d4c02db8e02e15cf Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 14:18:23 +0000 Subject: [PATCH 05/36] Upgraded the BTK container version to handle None in CSV files --- modules/local/blobtoolkit/chunk.nf | 2 +- modules/local/blobtoolkit/config.nf | 2 +- modules/local/blobtoolkit/countbuscos.nf | 2 +- modules/local/blobtoolkit/createblobdir.nf | 2 +- modules/local/blobtoolkit/extractbuscos.nf | 2 +- modules/local/blobtoolkit/metadata.nf | 2 +- modules/local/blobtoolkit/summary.nf | 2 +- modules/local/blobtoolkit/unchunk.nf | 2 +- modules/local/blobtoolkit/updateblobdir.nf | 2 +- modules/local/blobtoolkit/windowstats.nf | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/modules/local/blobtoolkit/chunk.nf b/modules/local/blobtoolkit/chunk.nf index 28ec6886..7dad9182 100644 --- a/modules/local/blobtoolkit/chunk.nf +++ b/modules/local/blobtoolkit/chunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_CHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta) , path(fasta) diff --git a/modules/local/blobtoolkit/config.nf b/modules/local/blobtoolkit/config.nf index 38affea1..32d4eacd 100644 --- a/modules/local/blobtoolkit/config.nf +++ b/modules/local/blobtoolkit/config.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CONFIG { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "GENERATE_CONFIG module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), val(reads) diff --git a/modules/local/blobtoolkit/countbuscos.nf b/modules/local/blobtoolkit/countbuscos.nf index 6446f089..1b415504 100644 --- a/modules/local/blobtoolkit/countbuscos.nf +++ b/modules/local/blobtoolkit/countbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_COUNTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_COUNTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(table, stageAs: 'dir??/*') diff --git a/modules/local/blobtoolkit/createblobdir.nf b/modules/local/blobtoolkit/createblobdir.nf index c27d7898..dfaddb7d 100644 --- a/modules/local/blobtoolkit/createblobdir.nf +++ b/modules/local/blobtoolkit/createblobdir.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(window, stageAs: 'windowstats/*') diff --git a/modules/local/blobtoolkit/extractbuscos.nf b/modules/local/blobtoolkit/extractbuscos.nf index 9ac93f25..a54a9437 100644 --- a/modules/local/blobtoolkit/extractbuscos.nf +++ b/modules/local/blobtoolkit/extractbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_EXTRACTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_EXTRACTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(fasta) diff --git a/modules/local/blobtoolkit/metadata.nf b/modules/local/blobtoolkit/metadata.nf index 6d29be8a..ffae2a8c 100644 --- a/modules/local/blobtoolkit/metadata.nf +++ b/modules/local/blobtoolkit/metadata.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_METADATA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_METADATA module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(yaml) diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 5cd6714f..429f69e0 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_SUMMARY { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_SUMMARY module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(blobdir) diff --git a/modules/local/blobtoolkit/unchunk.nf b/modules/local/blobtoolkit/unchunk.nf index 2e47854f..5285b0dc 100644 --- a/modules/local/blobtoolkit/unchunk.nf +++ b/modules/local/blobtoolkit/unchunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UNCHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UNCHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(blast_table) diff --git a/modules/local/blobtoolkit/updateblobdir.nf b/modules/local/blobtoolkit/updateblobdir.nf index 6bf975b9..c829ba76 100644 --- a/modules/local/blobtoolkit/updateblobdir.nf +++ b/modules/local/blobtoolkit/updateblobdir.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(input) diff --git a/modules/local/blobtoolkit/windowstats.nf b/modules/local/blobtoolkit/windowstats.nf index f3013701..b3b6c257 100644 --- a/modules/local/blobtoolkit/windowstats.nf +++ b/modules/local/blobtoolkit/windowstats.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_WINDOWSTATS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_WINDOWSTATS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.4" + container "docker.io/genomehubs/blobtoolkit:4.3.9" input: tuple val(meta), path(tsv) From f04b749ad375c2b508c3bcbbfb82367c3c15cc93 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 14:42:10 +0000 Subject: [PATCH 06/36] Make minimap2 support large genomes --- conf/modules.config | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 974728f5..184b98e1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -29,23 +29,23 @@ process { } withName: "MINIMAP2_HIC" { - ext.args = "-ax sr" + ext.args = { "-ax sr -I" + Math.ceil(reference.size()/1e9) + 'G' } } withName: "MINIMAP2_ILMN" { - ext.args = "-ax sr" + ext.args = { "-ax sr -I" + Math.ceil(reference.size()/1e9) + 'G' } } withName: "MINIMAP2_CCS" { - ext.args = "-ax map-hifi --cs=short" + ext.args = { "-ax map-hifi --cs=short -I" + Math.ceil(reference.size()/1e9) + 'G' } } withName: "MINIMAP2_CLR" { - ext.args = "-ax map-pb" + ext.args = { "-ax map-pb -I" + Math.ceil(reference.size()/1e9) + 'G' } } withName: "MINIMAP2_ONT" { - ext.args = "-ax map-ont" + ext.args = { "-ax map-ont -I" + Math.ceil(reference.size()/1e9) + 'G' } } withName: "SAMTOOLS_VIEW" { From 40fbde5167714a5d16676286eafae0b8a13e5940 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 14:51:50 +0000 Subject: [PATCH 07/36] Pull the genome size as a meta field to allow usage in every process --- conf/modules.config | 10 +++++----- subworkflows/local/prepare_genome.nf | 12 +++++++++--- workflows/blobtoolkit.nf | 6 +++--- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 184b98e1..0e64dd7e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -29,23 +29,23 @@ process { } withName: "MINIMAP2_HIC" { - ext.args = { "-ax sr -I" + Math.ceil(reference.size()/1e9) + 'G' } + ext.args = { "-ax sr -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_ILMN" { - ext.args = { "-ax sr -I" + Math.ceil(reference.size()/1e9) + 'G' } + ext.args = { "-ax sr -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_CCS" { - ext.args = { "-ax map-hifi --cs=short -I" + Math.ceil(reference.size()/1e9) + 'G' } + ext.args = { "-ax map-hifi --cs=short -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_CLR" { - ext.args = { "-ax map-pb -I" + Math.ceil(reference.size()/1e9) + 'G' } + ext.args = { "-ax map-pb -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "MINIMAP2_ONT" { - ext.args = { "-ax map-ont -I" + Math.ceil(reference.size()/1e9) + 'G' } + ext.args = { "-ax map-ont -I" + Math.ceil(meta2.genome_size/1e9) + 'G' } } withName: "SAMTOOLS_VIEW" { diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index d1e31a72..0b426fae 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -20,12 +20,18 @@ workflow PREPARE_GENOME { // MODULE: Decompress FASTA file if needed // if ( params.fasta.endsWith('.gz') ) { - ch_genome = GUNZIP ( fasta ).gunzip + ch_unzipped = GUNZIP ( fasta ).gunzip ch_versions = ch_versions.mix ( GUNZIP.out.versions ) } else { - ch_genome = fasta + ch_unzipped = fasta } + // + // LOGIC: Extract the genome size for decision making downstream + // + ch_unzipped + | map { meta, fa -> [ meta + [genome_size: fa.size()], fa] } + | set { ch_genome } // // MODULES: Mask the genome if needed @@ -46,4 +52,4 @@ workflow PREPARE_GENOME { emit: genome = ch_fasta // channel: [ meta, path(genome) ] versions = ch_versions // channel: [ versions.yml ] -} \ No newline at end of file +} diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index 944ccc4c..8b383ded 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -105,7 +105,7 @@ workflow BLOBTOOLKIT { // // SUBWORKFLOW: Check samplesheet and create channels for downstream analysis // - INPUT_CHECK ( ch_input, ch_fasta, ch_yaml ) + INPUT_CHECK ( ch_input, PREPARE_GENOME.out.genome, ch_yaml ) ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) // @@ -130,9 +130,9 @@ workflow BLOBTOOLKIT { // if (params.taxa_file) { ch_taxa = Channel.from(params.taxa_file) - ch_taxon_taxa = ch_fasta.combine(ch_taxon).combine(ch_taxa).map { meta, fasta, taxon, taxa -> [ meta, taxon, taxa ] } + ch_taxon_taxa = PREPARE_GENOME.out.genome.combine(ch_taxon).combine(ch_taxa).map { meta, fasta, taxon, taxa -> [ meta, taxon, taxa ] } } else { - ch_taxon_taxa = ch_fasta.combine(ch_taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] } + ch_taxon_taxa = PREPARE_GENOME.out.genome.combine(ch_taxon).map { meta, fasta, taxon -> [ meta, taxon, [] ] } } BUSCO_DIAMOND ( From 46d231dfea3fa59bbc1ec1ccb32d12c950a26ae7 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 15:37:59 +0000 Subject: [PATCH 08/36] Don't modify the blobdir in place --- bin/update_versions.py | 9 ++++---- conf/modules.config | 26 ++++++++-------------- modules/local/blobtoolkit/summary.nf | 2 +- modules/local/blobtoolkit/updateblobdir.nf | 7 ++++-- modules/local/blobtoolkit/updatemeta.nf | 7 +++--- 5 files changed, 24 insertions(+), 27 deletions(-) diff --git a/bin/update_versions.py b/bin/update_versions.py index 0978393c..9e014b46 100755 --- a/bin/update_versions.py +++ b/bin/update_versions.py @@ -12,9 +12,10 @@ def parse_args(args=None): Description = "Combine BED files to create window stats input file." parser = argparse.ArgumentParser(description=Description) - parser.add_argument("--meta", help="Input JSON file.", required=True) + parser.add_argument("--meta_in", help="Input JSON file.", required=True) + parser.add_argument("--meta_out", help="Output JSON file.", required=True) parser.add_argument("--software", help="Input YAML file.", required=True) - parser.add_argument("--version", action="version", version="%(prog)s 1.0.0") + parser.add_argument("--version", action="version", version="%(prog)s 1.1.0") return parser.parse_args(args) @@ -41,8 +42,8 @@ def update_meta(meta, software): def main(args=None): args = parse_args(args) - data = update_meta(args.meta, args.software) - with open(args.meta, "w") as fh: + data = update_meta(args.meta_in, args.software) + with open(args.meta_out, "w") as fh: json.dump(data, fh) diff --git a/conf/modules.config b/conf/modules.config index 0e64dd7e..25c24cdc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -98,22 +98,6 @@ process { ext.args = "--evalue 1.0e-25 --hit-count 10" } - withName: "BLOBTOOLKIT_SUMMARY" { - publishDir = [ - path: { "${params.outdir}/blobtoolkit/${blobdir.name}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } - ] - } - - withName: "BLOBTK_IMAGES" { - publishDir = [ - path: { "${params.outdir}/blobtoolkit/plots" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } - ] - } - withName: "BLOBTOOLKIT_CHUNK" { ext.args = "--chunk 100000 --overlap 0 --max-chunks 10 --min-length 1000" } @@ -138,7 +122,7 @@ process { ] } - withName: "BLOBTOOLKIT_UPDATEMETA" { + withName: "BLOBTOOLKIT_UPDATEBLOBDIR" { publishDir = [ path: { "${params.outdir}/blobtoolkit" }, mode: params.publish_dir_mode, @@ -146,6 +130,14 @@ process { ] } + withName: "BLOBTK_IMAGES" { + publishDir = [ + path: { "${params.outdir}/blobtoolkit/plots" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] + } + withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 429f69e0..9b1a262f 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -23,7 +23,7 @@ process BLOBTOOLKIT_SUMMARY { """ blobtools filter \\ ${args} \\ - --summary ${blobdir}/${prefix}.summary.json ${blobdir} + --summary ${prefix}.summary.json ${blobdir} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/blobtoolkit/updateblobdir.nf b/modules/local/blobtoolkit/updateblobdir.nf index c829ba76..50167f8b 100644 --- a/modules/local/blobtoolkit/updateblobdir.nf +++ b/modules/local/blobtoolkit/updateblobdir.nf @@ -8,7 +8,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { container "docker.io/genomehubs/blobtoolkit:4.3.9" input: - tuple val(meta), path(input) + tuple val(meta), path(input, stageAs: "input_blobdir") tuple val(meta1), path(blastx, stageAs: "blastx.txt") tuple val(meta2), path(blastn, stageAs: "blastn.txt") path(taxdump) @@ -26,6 +26,9 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { def hits_blastx = blastx ? "--hits ${blastx}" : "" def hits_blastn = blastn ? "--hits ${blastn}" : "" """ + # In-place modifications are not great in Nextflow, so work on a copy of ${input} + mkdir ${prefix} + cp --preserve=timestamp ${input}/* ${prefix}/ blobtools replace \\ --taxdump ${taxdump} \\ --taxrule bestdistorder=buscoregions \\ @@ -33,7 +36,7 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { ${hits_blastn} \\ --threads ${task.cpus} \\ $args \\ - ${input} + ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/blobtoolkit/updatemeta.nf b/modules/local/blobtoolkit/updatemeta.nf index 45df4209..de1313d5 100644 --- a/modules/local/blobtoolkit/updatemeta.nf +++ b/modules/local/blobtoolkit/updatemeta.nf @@ -12,8 +12,8 @@ process BLOBTOOLKIT_UPDATEMETA { path versions output: - tuple val(meta), path(prefix), emit: blobdir - path "versions.yml" , emit: versions + tuple val(meta), path("*.json"), emit: json + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,8 +24,9 @@ process BLOBTOOLKIT_UPDATEMETA { """ update_versions.py \\ ${args} \\ - --meta ${input}/meta.json \\ + --meta_in ${input}/meta.json \\ --software ${versions} \\ + --meta_out ${prefix}.meta.json cat <<-END_VERSIONS > versions.yml "${task.process}": From bb428b2575bc26479df970b4d96c854127d53d5e Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 16:22:42 +0000 Subject: [PATCH 09/36] Compress the blobdir --- conf/modules.config | 2 +- modules/local/compressblobdir.nf | 37 ++++++++++++++++++++++++++ subworkflows/local/finalise_blobdir.nf | 33 +++++++++++++++++++++++ workflows/blobtoolkit.nf | 11 +++++--- 4 files changed, 79 insertions(+), 4 deletions(-) create mode 100644 modules/local/compressblobdir.nf create mode 100644 subworkflows/local/finalise_blobdir.nf diff --git a/conf/modules.config b/conf/modules.config index 25c24cdc..2ab3ba99 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -122,7 +122,7 @@ process { ] } - withName: "BLOBTOOLKIT_UPDATEBLOBDIR" { + withName: "COMPRESSBLOBDIR" { publishDir = [ path: { "${params.outdir}/blobtoolkit" }, mode: params.publish_dir_mode, diff --git a/modules/local/compressblobdir.nf b/modules/local/compressblobdir.nf new file mode 100644 index 00000000..efb1b7a5 --- /dev/null +++ b/modules/local/compressblobdir.nf @@ -0,0 +1,37 @@ +process COMPRESSBLOBDIR { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::pigz=2.8" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pigz:2.8': + 'biocontainers/pigz:2.8' }" + + input: + tuple val(meta), path(input, stageAs: "input_blobdir") + tuple val(meta1), path(summary_json) + tuple val(meta2), path(meta_json) + + output: + tuple val(meta), path(prefix), emit: blobdir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + cp ${input}/* ${prefix}/ + cp ${summary_json} ${prefix}/summary.json + cp ${meta_json} ${prefix}/meta.json + pigz --processes $task.cpus ${prefix}/*.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + END_VERSIONS + """ +} diff --git a/subworkflows/local/finalise_blobdir.nf b/subworkflows/local/finalise_blobdir.nf new file mode 100644 index 00000000..ffbbd534 --- /dev/null +++ b/subworkflows/local/finalise_blobdir.nf @@ -0,0 +1,33 @@ +// +// Final edits to the blobdir +// + +include { BLOBTOOLKIT_UPDATEMETA } from '../../modules/local/blobtoolkit/updatemeta' +include { COMPRESSBLOBDIR } from '../../modules/local/compressblobdir' + +workflow FINALISE_BLOBDIR { + take: + blobdir // channel: [ val(meta), path(blobdir) ] + software // channel: [ val(meta), path(software_yml) ] + summary // channel: [ val(meta), path(summary_json) ] + + + main: + ch_versions = Channel.empty() + + // + // MODULE: Update meta json file + // + BLOBTOOLKIT_UPDATEMETA ( blobdir, software ) + + // + // MODULE: Compress all the json files + // + COMPRESSBLOBDIR ( blobdir, summary, BLOBTOOLKIT_UPDATEMETA.out.json ) + ch_versions = ch_versions.mix ( COMPRESSBLOBDIR.out.versions.first() ) + + + emit: + blobdir = COMPRESSBLOBDIR.out.blobdir // channel: [ val(meta), path(json) ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index 8b383ded..f1917a08 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -55,7 +55,6 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil // MODULE: Loaded from modules/local/ // include { BLOBTOOLKIT_CONFIG } from '../modules/local/blobtoolkit/config' -include { BLOBTOOLKIT_UPDATEMETA } from '../modules/local/blobtoolkit/updatemeta' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -70,6 +69,7 @@ include { RUN_BLASTN } from '../subworkflows/local/run_blastn' include { COLLATE_STATS } from '../subworkflows/local/collate_stats' include { BLOBTOOLS } from '../subworkflows/local/blobtools' include { VIEW } from '../subworkflows/local/view' +include { FINALISE_BLOBDIR } from '../subworkflows/local/finalise_blobdir' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -208,9 +208,14 @@ workflow BLOBTOOLKIT { ) // - // MODULE: Update meta json file + // SUBWORKFLOW: Finalise and publish the blobdir // - BLOBTOOLKIT_UPDATEMETA ( BLOBTOOLS.out.blobdir, CUSTOM_DUMPSOFTWAREVERSIONS.out.yml ) + FINALISE_BLOBDIR ( + BLOBTOOLS.out.blobdir, + CUSTOM_DUMPSOFTWAREVERSIONS.out.yml, + VIEW.out.summary + ) + // Don't update ch_versions because it's already been consumed by now // From 9ee35c0fcb6c15b012437a94768c62c343604d26 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 16:46:05 +0000 Subject: [PATCH 10/36] Updated the changelog --- CHANGELOG.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56007304..d439b19e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,29 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[0.4.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.4.0)] – Buneary – [2024-03-28] + +The pipeline has now been validated on dozens of genomes, up to 11 Gbp. + +### Enhancements & fixes + +- Upgraded the version of `blobtools`, which enables a better reporting of + wrong accession numbers and a better handling of oddities in input files. +- Files in the output blobdir are now compressed. +- All modules handling blobdirs can now be cached. +- Large genomes supported, up to at least 11 Gbp. +- More fields included in the trace files. + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| blobtoolkit | 4.3.3 | 4.3.9 | + +> **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. + ## [[0.3.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.3.0)] – Poliwag – [2024-02-09] The pipeline has now been validated on five genomes, all under 100 Mbp: a @@ -33,6 +56,16 @@ sponge, a platyhelminth, and three fungi. > **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| blobtoolkit | 4.3.2 | 4.3.3 | + +> **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. + ## [[0.2.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.2.0)] – Pikachu – [2023-12-22] ### Enhancements & fixes From 2c1bd1f1c0a67ba606a41724bdd3221f797d8159 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 16:59:20 +0000 Subject: [PATCH 11/36] Updated the documentation --- docs/output.md | 41 +++++++++++++++++++++++++++++++++++------ docs/usage.md | 2 +- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/docs/output.md b/docs/output.md index e6efe8bc..18fe2b6d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -8,13 +8,13 @@ The directories listed below will be created in the results directory after the The directories comply with Tree of Life's canonical directory structure. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [BlobDir](#blobdir) - Output files from `blobtools` and `view` subworkflow +- [BlobDir](#blobdir) - Output files viewable on a [BlobToolKit viewer](https://github.com/blobtoolkit/blobtoolkit) +- [Static plots](#static-plots) - Static versions of the BlobToolKit plots +- [BUSCO](#busco) - BUSCO results - [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -25,14 +25,43 @@ The files in the BlobDir dataset which is used to create the online interactive
Output files -- `/` - - `*.json`: files generated from genome and alignment coverage statistics - - `*.png`: static plot images +- `blobtoolkit/` + - `/` + - `*.json.gz`: files generated from genome and alignment coverage statistics More information about visualising the data in the [BlobToolKit repository](https://github.com/blobtoolkit/blobtoolkit/tree/main/src/viewer)
+### Static plots + +Images generated from the above blobdir using the [blobtk](https://github.com/blobtoolkit/blobtk) tool. + +
+Output files + +- `blobtoolkit/` + - `plots/` + - `*.png` or `*.svg`, depending on the selected output format: static versions of the BlobToolKit plots. + +
+ +### BUSCO + +BUSCO results generated by the pipeline (all BUSCO lineages that match the claassification of the species). + +
+Output files + +- `blobtoolkit/` + - `busco/` + - `*.batch_summary.txt`: BUSCO scores as tab-separated files (1 file per lineage). + - `*.fasta.txt`: BUSCO scores as formatted text (1 file per lineage). + - `*.json`: BUSCO scores as JSON (1 file per lineage). + - `*/`: all output BUSCO files, including the coordinate and sequence files of the annotated genes. + +
+ ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index 84229b17..0578c2bb 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -229,7 +229,7 @@ List of tools for any given dataset can be fetched from the API, for example htt | Dependency | Snakemake | Nextflow | | ----------------- | --------- | -------- | -| blobtoolkit | 4.3.2 | 4.3.2 | +| blobtoolkit | 4.3.2 | 4.3.9 | | blast | 2.12.0 | 2.14.1 | | blobtk | 0.5.0 | 0.5.1 | | busco | 5.3.2 | 5.5.0 | From d7fa22846a6b7346d0f607b98070a5d73f05d246 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 16:59:49 +0000 Subject: [PATCH 12/36] Version bump --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 43d996ce..e65d05a9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -248,7 +248,7 @@ manifest { description = """Quality assessment of genome assemblies""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '0.3.0' + version = '0.4.0' doi = '10.5281/zenodo.7949058' } From b53b497ad8648e005ff034cf6fdd3e8fb9c4a994 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 17:17:42 +0000 Subject: [PATCH 13/36] Updated the resource requirements of fasta_windows --- conf/base.config | 8 ++++++++ modules.json | 3 ++- modules/nf-core/fastawindows/fastawindows.diff | 12 ++++++++++++ modules/nf-core/fastawindows/main.nf | 1 - 4 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/fastawindows/fastawindows.diff diff --git a/conf/base.config b/conf/base.config index 6ebea12c..e52cb324 100644 --- a/conf/base.config +++ b/conf/base.config @@ -52,6 +52,14 @@ process { withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + + withName: 'FASTAWINDOWS' { + // 1 CPU per 1 Gbp + cpus = { check_max( Math.ceil(meta.genome_size / 1000000000), 'cpus' ) } + // 100 MB per 45 Mbp + memory = { check_max( 100.MB * task.attempt * Math.ceil(meta.genome_size / 45000000), 'memory' ) } + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/modules.json b/modules.json index c458f840..b8d3701b 100644 --- a/modules.json +++ b/modules.json @@ -41,7 +41,8 @@ "fastawindows": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/fastawindows/fastawindows.diff" }, "goat/taxonsearch": { "branch": "master", diff --git a/modules/nf-core/fastawindows/fastawindows.diff b/modules/nf-core/fastawindows/fastawindows.diff new file mode 100644 index 00000000..12f809e6 --- /dev/null +++ b/modules/nf-core/fastawindows/fastawindows.diff @@ -0,0 +1,12 @@ +Changes in module 'nf-core/fastawindows' +--- modules/nf-core/fastawindows/main.nf ++++ modules/nf-core/fastawindows/main.nf +@@ -1,6 +1,5 @@ + process FASTAWINDOWS { + tag "$meta.id" +- label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + +************************************************************ diff --git a/modules/nf-core/fastawindows/main.nf b/modules/nf-core/fastawindows/main.nf index 03cc8c57..40b28436 100644 --- a/modules/nf-core/fastawindows/main.nf +++ b/modules/nf-core/fastawindows/main.nf @@ -1,6 +1,5 @@ process FASTAWINDOWS { tag "$meta.id" - label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From f99b54224bad4ffddd9f4f617b30b6bed93dd330 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 17:23:45 +0000 Subject: [PATCH 14/36] Updated the resource requirements of BUSCO --- conf/base.config | 14 ++++++++++++++ modules/nf-core/busco/busco.diff | 7 ++++--- modules/nf-core/busco/main.nf | 1 - nextflow.config | 27 +++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) diff --git a/conf/base.config b/conf/base.config index e52cb324..e2bc8fc5 100644 --- a/conf/base.config +++ b/conf/base.config @@ -60,6 +60,20 @@ process { memory = { check_max( 100.MB * task.attempt * Math.ceil(meta.genome_size / 45000000), 'memory' ) } } + withName: BUSCO { + // No straightforward formula, so using ranges instead. + // The memory is increased by half of the base value at every attempt. + memory = { check_max( ( + meta.genome_size < 100000000 ? 4.GB : + meta.genome_size < 500000000 ? 8.GB : + meta.genome_size < 1000000000 ? 16.GB : + meta.genome_size < 2000000000 ? 32.GB : + meta.genome_size < 5000000000 ? 64.GB : 192.GB + ) * ((task.attempt+1)/2) , 'memory' ) } + cpus = { log_increase_cpus(4, 2*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } + time = { check_max( 2.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/modules/nf-core/busco/busco.diff b/modules/nf-core/busco/busco.diff index 0a402c4c..775788fb 100644 --- a/modules/nf-core/busco/busco.diff +++ b/modules/nf-core/busco/busco.diff @@ -1,14 +1,15 @@ Changes in module 'nf-core/busco' --- modules/nf-core/busco/main.nf +++ modules/nf-core/busco/main.nf -@@ -1,5 +1,5 @@ +@@ -1,6 +1,5 @@ process BUSCO { - tag "$meta.id" +- label 'process_medium' + tag "${meta.id}_${lineage}" - label 'process_medium' conda "${moduleDir}/environment.yml" -@@ -37,7 +37,7 @@ + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +@@ -37,7 +36,7 @@ def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" def busco_config = config_file ? "--config $config_file" : '' def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}" diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/main.nf index 867238cf..83d8eacd 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/main.nf @@ -1,6 +1,5 @@ process BUSCO { tag "${meta.id}_${lineage}" - label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/nextflow.config b/nextflow.config index e65d05a9..adc40371 100644 --- a/nextflow.config +++ b/nextflow.config @@ -287,3 +287,30 @@ def check_max(obj, type) { } } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Increasing the number of CPUs often gives diminishing returns, so we increase it + following a logarithm curve. Example: + - 0 < value <= 1: start + step + - 1 < value <= 2: start + 2*step + - 2 < value <= 4: start + 3*step + - 4 < value <= 8: start + 4*step + In order to support re-runs, the step increase may be multiplied by the attempt + number prior to calling this function. +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Modified logarithm function that doesn't return negative numbers +def positive_log(value, base) { + if (value <= 1) { + return 0 + } else { + return Math.log(value)/Math.log(base) + } +} + +def log_increase_cpus(start, step, value, base) { + return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus') +} + From 72a639b2d58d0da6cbd0bfcd71ae87d5ccae5e7a Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:08:48 +0000 Subject: [PATCH 15/36] Updated the resource requirements of WINDOWSTATS_INPUT --- conf/base.config | 7 +++++++ modules/local/windowstats_input.nf | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index e2bc8fc5..281e0fef 100644 --- a/conf/base.config +++ b/conf/base.config @@ -53,6 +53,13 @@ process { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + withName: 'WINDOWSTATS_INPUT' { + cpus = { check_max( 1 , 'cpus' ) } + // 2 GB per 1 Gbp + memory = { check_max( 2.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000), 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withName: 'FASTAWINDOWS' { // 1 CPU per 1 Gbp cpus = { check_max( Math.ceil(meta.genome_size / 1000000000), 'cpus' ) } diff --git a/modules/local/windowstats_input.nf b/modules/local/windowstats_input.nf index 6fe537b1..4ed7c6d4 100644 --- a/modules/local/windowstats_input.nf +++ b/modules/local/windowstats_input.nf @@ -1,6 +1,5 @@ process WINDOWSTATS_INPUT { tag "$meta.id" - label 'process_single' conda "conda-forge::pandas=1.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? From 54862de2558344eccef5337dfe52ba1011f6db5b Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:12:21 +0000 Subject: [PATCH 16/36] Updated the resource requirements of BLOBTOOLKIT_WINDOWSTATS --- conf/base.config | 7 +++++++ modules/local/blobtoolkit/windowstats.nf | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 281e0fef..8764d360 100644 --- a/conf/base.config +++ b/conf/base.config @@ -60,6 +60,13 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } } + withName: 'BLOBTOOLKIT_WINDOWSTATS' { + cpus = { check_max( 1 , 'cpus' ) } + // 3 GB per 1 Gbp + memory = { check_max( 3.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000), 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withName: 'FASTAWINDOWS' { // 1 CPU per 1 Gbp cpus = { check_max( Math.ceil(meta.genome_size / 1000000000), 'cpus' ) } diff --git a/modules/local/blobtoolkit/windowstats.nf b/modules/local/blobtoolkit/windowstats.nf index b3b6c257..d432a8ff 100644 --- a/modules/local/blobtoolkit/windowstats.nf +++ b/modules/local/blobtoolkit/windowstats.nf @@ -1,6 +1,5 @@ process BLOBTOOLKIT_WINDOWSTATS { tag "$meta.id" - label 'process_single' if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_WINDOWSTATS module does not support Conda. Please use Docker / Singularity / Podman instead." From 424d7e0a093524755747fc22c6c7ac9d47f8653a Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:23:51 +0000 Subject: [PATCH 17/36] Added missing FastQ extensions --- bin/check_samplesheet.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index c63d06fe..067f954d 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -27,6 +27,8 @@ class RowChecker: VALID_FORMATS = ( ".cram", ".bam", + ".fq", + ".fq.gz", ".fastq", ".fastq.gz", ) From f8c3cd0067500115b6d002a8a4d4b82e49e7154e Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:24:25 +0000 Subject: [PATCH 18/36] Also support FastA files --- bin/check_samplesheet.py | 4 ++++ subworkflows/local/minimap_alignment.nf | 2 ++ 2 files changed, 6 insertions(+) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 067f954d..6b5392bf 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -31,6 +31,10 @@ class RowChecker: ".fq.gz", ".fastq", ".fastq.gz", + ".fa", + ".fa.gz", + ".fasta", + ".fasta.gz", ) VALID_DATATYPES = ( diff --git a/subworkflows/local/minimap_alignment.nf b/subworkflows/local/minimap_alignment.nf index e0b479bc..1d6263b3 100644 --- a/subworkflows/local/minimap_alignment.nf +++ b/subworkflows/local/minimap_alignment.nf @@ -24,6 +24,7 @@ workflow MINIMAP2_ALIGNMENT { input | branch { meta, reads -> + fasta: reads.toString().endsWith(".fasta") || reads.toString().endsWith(".fasta.gz") || reads.toString().endsWith(".fa") || reads.toString().endsWith(".fa.gz") fastq: reads.toString().endsWith(".fastq") || reads.toString().endsWith(".fastq.gz") || reads.toString().endsWith(".fq") || reads.toString().endsWith(".fq.gz") bamcram: true } @@ -35,6 +36,7 @@ workflow MINIMAP2_ALIGNMENT { // Branch input by sequencing type SAMTOOLS_FASTA.out.interleaved + | mix ( ch_reads_by_type.fasta ) | mix ( ch_reads_by_type.fastq ) | branch { meta, reads -> From 8f7e029f27313c257dc96a01213c05a29faebf5e Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:28:17 +0000 Subject: [PATCH 19/36] Updated the input JSON --- assets/schema_input.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index f08ccb89..26ed41cb 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -21,8 +21,8 @@ }, "datafile": { "type": "string", - "pattern": "^\\S+\\.cram$", - "errorMessage": "Data file for reads cannot contain spaces and must have extension 'cram'" + "pattern": "^\\S+\\.(bam|cram|fa|fa.gz|fasta|fasta.gz|fq|fq.gz|fastq|fastq.gz)$", + "errorMessage": "Data file for reads cannot contain spaces and must be BAM/CRAM/FASTQ/FASTA" } }, "required": ["datafile", "datatype", "sample"] From 75bbc2165ad69cfef83548b1f4cd40875b81f56a Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:30:07 +0000 Subject: [PATCH 20/36] Updated the CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d439b19e..83962ddf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The pipeline has now been validated on dozens of genomes, up to 11 Gbp. - Files in the output blobdir are now compressed. - All modules handling blobdirs can now be cached. - Large genomes supported, up to at least 11 Gbp. +- Allow all variations of FASTA and FASTQ extensions for input. - More fields included in the trace files. ### Software dependencies From 7dd24b39d94213e7f93f88f0e755e1ef04e8a7ff Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 18:30:40 +0000 Subject: [PATCH 21/36] Only BAM and CRAM are accepted for aligned reads --- subworkflows/local/input_check.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 5b028911..fbb397f0 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -79,8 +79,8 @@ def create_data_channels(LinkedHashMap row) { // add path(s) of the read file(s) to the meta map def data_meta = [] - if ( !params.align && (row.datafile.endsWith(".fastq") || row.datafile.endsWith(".fastq.gz")) ) { - exit 1, "ERROR: Please check input samplesheet and pipeline parameters -> Data file is in FastQ format but --align is not set!\n${row.datafile}" + if ( !params.align && !row.datafile.endsWith(".bam") && !row.datafile.endsWith(".cram") ) { + exit 1, "ERROR: Please check input samplesheet and pipeline parameters -> Data file is in FastA/FastQ format but --align is not set!\n${row.datafile}" } if ( !file(row.datafile).exists() ) { From a71b75966522976105d7cf6fed3ea2ac86f6101e Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 19:05:14 +0000 Subject: [PATCH 22/36] There is no patch for cat/cat --- modules.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules.json b/modules.json index b8d3701b..6470fda1 100644 --- a/modules.json +++ b/modules.json @@ -20,8 +20,7 @@ "cat/cat": { "branch": "master", "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", - "installed_by": ["modules"], - "patch": "modules/nf-core/cat/cat/cat-cat.diff" + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", From 857d0940984f36b04c026e590ac6c55d61598616 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 19:18:57 +0000 Subject: [PATCH 23/36] Updated all nf-core modules --- CHANGELOG.md | 11 +- modules.json | 22 +- modules/nf-core/cat/cat/tests/main.nf.test | 29 +- .../nf-core/cat/cat/tests/main.nf.test.snap | 92 ++-- .../dumpsoftwareversions/environment.yml | 2 +- .../custom/dumpsoftwareversions/main.nf | 4 +- .../dumpsoftwareversions/tests/main.nf.test | 7 +- .../tests/main.nf.test.snap | 50 +- modules/nf-core/gunzip/tests/main.nf.test | 9 +- modules/nf-core/minimap2/align/main.nf | 16 +- .../nf-core/minimap2/align/tests/main.nf.test | 34 ++ .../minimap2/align/tests/main.nf.test.snap | 29 ++ modules/nf-core/multiqc/environment.yml | 2 +- modules/nf-core/multiqc/main.nf | 6 +- modules/nf-core/multiqc/meta.yml | 1 - modules/nf-core/multiqc/tests/main.nf.test | 53 +- .../nf-core/multiqc/tests/main.nf.test.snap | 41 ++ .../nf-core/samtools/fasta/environment.yml | 3 +- modules/nf-core/samtools/fasta/main.nf | 4 +- .../nf-core/samtools/index/environment.yml | 3 +- modules/nf-core/samtools/index/main.nf | 4 +- .../nf-core/samtools/index/tests/main.nf.test | 36 +- .../samtools/index/tests/main.nf.test.snap | 54 +- modules/nf-core/samtools/view/environment.yml | 3 +- modules/nf-core/samtools/view/main.nf | 4 +- .../nf-core/samtools/view/tests/main.nf.test | 193 ++++---- .../samtools/view/tests/main.nf.test.snap | 466 +++++++++++++++--- modules/nf-core/seqtk/subseq/environment.yml | 2 +- modules/nf-core/seqtk/subseq/main.nf | 27 +- modules/nf-core/seqtk/subseq/meta.yml | 4 +- .../nf-core/seqtk/subseq/tests/main.nf.test | 59 +++ .../seqtk/subseq/tests/main.nf.test.snap | 60 +++ .../seqtk/subseq/tests/standard.config | 5 + modules/nf-core/seqtk/subseq/tests/tags.yml | 2 + .../windowmasker/mkcounts/environment.yml | 2 +- modules/nf-core/windowmasker/mkcounts/main.nf | 8 +- .../windowmasker/mkcounts/tests/main.nf.test | 59 +++ .../mkcounts/tests/main.nf.test.snap | 60 +++ .../mkcounts/tests/nextflow.config | 5 + .../windowmasker/mkcounts/tests/tags.yml | 2 + .../windowmasker/ustat/environment.yml | 2 +- modules/nf-core/windowmasker/ustat/main.nf | 4 +- .../windowmasker/ustat/tests/main.nf.test | 66 +++ .../ustat/tests/main.nf.test.snap | 60 +++ .../windowmasker/ustat/tests/nextflow.config | 5 + .../nf-core/windowmasker/ustat/tests/tags.yml | 2 + subworkflows/local/run_blastn.nf | 9 +- 47 files changed, 1264 insertions(+), 357 deletions(-) create mode 100644 modules/nf-core/multiqc/tests/main.nf.test.snap create mode 100644 modules/nf-core/seqtk/subseq/tests/main.nf.test create mode 100644 modules/nf-core/seqtk/subseq/tests/main.nf.test.snap create mode 100644 modules/nf-core/seqtk/subseq/tests/standard.config create mode 100644 modules/nf-core/seqtk/subseq/tests/tags.yml create mode 100644 modules/nf-core/windowmasker/mkcounts/tests/main.nf.test create mode 100644 modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap create mode 100644 modules/nf-core/windowmasker/mkcounts/tests/nextflow.config create mode 100644 modules/nf-core/windowmasker/mkcounts/tests/tags.yml create mode 100644 modules/nf-core/windowmasker/ustat/tests/main.nf.test create mode 100644 modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap create mode 100644 modules/nf-core/windowmasker/ustat/tests/nextflow.config create mode 100644 modules/nf-core/windowmasker/ustat/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 83962ddf..2ef54b3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,14 +16,19 @@ The pipeline has now been validated on dozens of genomes, up to 11 Gbp. - Large genomes supported, up to at least 11 Gbp. - Allow all variations of FASTA and FASTQ extensions for input. - More fields included in the trace files. +- All nf-core modules updated ### Software dependencies Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. -| Dependency | Old version | New version | -| ----------- | ----------- | ----------- | -| blobtoolkit | 4.3.3 | 4.3.9 | +| Dependency | Old version | New version | +| ----------- | ------------- | ------------- | +| blobtoolkit | 4.3.3 | 4.3.9 | +| blast | 2.14.0 | 2.15.0 | +| multiqc | 1.17 and 1.18 | 1.20 and 1.21 | +| samtools | 1.18 | 1.19.2 | +| seqtk | 1.3 | 1.4 | > **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. diff --git a/modules.json b/modules.json index 6470fda1..e255f552 100644 --- a/modules.json +++ b/modules.json @@ -19,12 +19,12 @@ }, "cat/cat": { "branch": "master", - "git_sha": "81f27e75847087865299cc46605deb3b09b4e0a2", + "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", + "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", "installed_by": ["modules"] }, "diamond/blastp": { @@ -50,48 +50,48 @@ }, "gunzip": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "efbf86bb487f288ac30660282709d9620dd6048e", + "git_sha": "2c2d1cf80866dbd6dd0ea5d61ddd59533a72d41e", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "4ab13872435962dadc239979554d13709e20bf29", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "samtools/fasta": { "branch": "master", - "git_sha": "9b1071e19265cf9c0d06958a011cf7a9cfe37213", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"], "patch": "modules/nf-core/samtools/fasta/samtools-fasta.diff" }, "samtools/index": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", "installed_by": ["modules"] }, "seqtk/subseq": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", "installed_by": ["modules"] }, "windowmasker/mkcounts": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", "installed_by": ["modules"] } } diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index aaae04f9..fcee2d19 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -19,8 +19,8 @@ nextflow_process { [ [ id:'genome', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -45,8 +45,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -72,8 +72,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -83,7 +83,8 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} ) } } @@ -101,8 +102,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true) ] ] """ @@ -130,8 +131,8 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), - file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true) ] ] """ @@ -141,7 +142,8 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, + { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} ) } } @@ -158,7 +160,7 @@ nextflow_process { [ [ id:'test', single_end:true ], [ - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] ] """ @@ -174,4 +176,3 @@ nextflow_process { } } } - diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 0c9bfe8d..423571ba 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,4 +1,10 @@ { + "test_cat_unzipped_zipped_size": { + "content": [ + 375 + ], + "timestamp": "2023-10-16T14:33:08.049445686" + }, "test_cat_unzipped_unzipped": { "content": [ { @@ -61,36 +67,31 @@ ], "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped": { + "test_cat_zipped_zipped_lines": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "test.gff3.gz:md5,c439d3b60e7bc03e8802a451a0d9a5d9" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } + [ + "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", + "MT192765.1\tGenbank\tgene\t259\t21548\t.\t+\t.\tParent=unknown_transcript_1", + "MT192765.1\tGenbank\tCDS\t259\t13461\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", + "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", + "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ] + ], + "timestamp": "2023-10-16T14:32:33.629048645" + }, + "test_cat_unzipped_zipped_lines": { + "content": [ + [ + ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", + "GTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGT", + "GTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAG", + "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", + "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", + "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ] ], - "timestamp": "2024-01-12T14:02:02.999254641" + "timestamp": "2023-10-16T14:33:08.038830506" }, "test_cat_one_file_unzipped_zipped_lines": { "content": [ @@ -105,41 +106,16 @@ ], "timestamp": "2023-10-16T14:33:21.39642399" }, - "test_cat_unzipped_zipped": { + "test_cat_zipped_zipped_size": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "1": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ], - "file_out": [ - [ - { - "id": "test", - "single_end": true - }, - "cat.txt.gz:md5,f44b33a0e441ad58b2d3700270e2dbe2" - ] - ], - "versions": [ - "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" - ] - } + 78 ], - "timestamp": "2024-01-12T14:08:26.948048418" + "timestamp": "2023-10-16T14:32:33.641869244" }, "test_cat_one_file_unzipped_zipped_size": { "content": [ 374 ], - "timestamp": "2024-01-12T14:10:22.445700266" + "timestamp": "2023-10-16T14:33:21.4094373" } -} +} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml index f0c63f69..b48ced26 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 7685b33c..105f9265 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : + 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test index eec1db10..b1e1630b 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test @@ -31,7 +31,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.versions, + file(process.out.mqc_yml[0]).readLines()[0..10], + file(process.out.yml[0]).readLines()[0..7] + ).match() + } ) } } diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap index 4274ed57..5f59a936 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap @@ -1,27 +1,33 @@ { "Should run without failures": { "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } + [ + "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" + ], + [ + "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", + " \\n\\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n \\n \\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\", + " \\ \\n\\n\\n\\n\\n\\n \\n\\", + " \\ \\n \\n\\n\\n\\n\\", + " \\n\\n \\n \\n\\" + ], + [ + "CUSTOM_DUMPSOFTWAREVERSIONS:", + " python: 3.11.7", + " yaml: 5.4.1", + "TOOL1:", + " tool1: 0.11.9", + "TOOL2:", + " tool2: '1.9'", + "Workflow:" + ] ], - "timestamp": "2023-11-03T14:43:22.157011" + "timestamp": "2024-01-09T23:01:18.710682" } -} +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index d0317922..6406008e 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -15,10 +15,11 @@ nextflow_process { } process { """ - input[0] = [ - [], - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) """ } } diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 07a32158..661bd23d 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -34,13 +34,25 @@ process MINIMAP2_ALIGN { minimap2 \\ $args \\ -t $task.cpus \\ - "${reference ?: reads}" \\ - "$reads" \\ + ${reference ?: reads} \\ + $reads \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + """ + touch $output_file + cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index b634468b..4d77e0d9 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -142,4 +142,38 @@ nextflow_process { } + test("sarscov2 - fastq, fasta, false, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.paf[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + } diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index a39a1697..ec99d13e 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -6,6 +6,10 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:06.01315354" }, "sarscov2 - fastq, fasta, true, false, false - stub": { @@ -15,8 +19,25 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:24.487175659" }, + "sarscov2 - fastq, fasta, false, false, false - stub": { + "content": [ + "test.paf", + [ + "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-01T11:06:54.090105" + }, "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { "content": [ "test.bam", @@ -24,6 +45,10 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:12.50816279" }, "sarscov2 - fastq, [], true, false, false": { @@ -33,6 +58,10 @@ "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, "timestamp": "2023-12-04T12:07:18.414974788" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index bc0bdb5b..ca39fb67 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.18 + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 00cc48d2..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.18--pyhdfd78af_0' : - 'biocontainers/multiqc:1.18--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -43,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660e..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index c2dad217..f1c4242e 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -3,19 +3,17 @@ nextflow_process { name "Test Process MULTIQC" script "../main.nf" process "MULTIQC" + tag "modules" tag "modules_nfcore" tag "multiqc" - test("MULTIQC: FASTQC") { + test("sarscov2 single-end [fastqc]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = [] input[2] = [] input[3] = [] @@ -26,23 +24,20 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } ) } } - test("MULTIQC: FASTQC and a config file") { + test("sarscov2 single-end [fastqc] [config]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = Channel.of([file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz_fastqc_zip'], checkIfExists: true)]) + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] @@ -53,9 +48,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.report.get(0)).exists() }, - { assert path(process.out.data.get(0)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("multiqc") } + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/fasta/environment.yml b/modules/nf-core/samtools/fasta/environment.yml index 05cb8a8e..14585013 100644 --- a/modules/nf-core/samtools/fasta/environment.yml +++ b/modules/nf-core/samtools/fasta/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/fasta/main.nf b/modules/nf-core/samtools/fasta/main.nf index 4b0cad9a..9aa03430 100644 --- a/modules/nf-core/samtools/fasta/main.nf +++ b/modules/nf-core/samtools/fasta/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FASTA { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 296ed99e..a5e50649 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 8ad18fdc..dc14f98d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index c76a9169..bb7756d1 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/index" - test("sarscov2 [BAI]") { + test("bai") { when { params { @@ -16,10 +16,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -28,12 +28,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.bai).match("bai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("bai_versions") } ) } } - test("homo_sapiens [CRAI]") { + test("crai") { when { params { @@ -41,10 +41,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) """ } } @@ -53,12 +53,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.crai).match("crai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("crai_versions") } ) } } - test("homo_sapiens [CSI]") { + test("csi") { config "./csi.nextflow.config" @@ -68,10 +68,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -80,7 +80,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("csi_versions") } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index b3baee7f..3dc8e7de 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,28 +1,74 @@ { + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, "crai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" ] ] ], - "timestamp": "2023-11-15T15:17:37.30801" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" }, "bai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" ] ] ], - "timestamp": "2023-11-15T15:17:30.869234" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index 99aa69d0..b0676f33 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 0b5a2912..5a8989d6 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test index 89ed3555..45a0defb 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -9,16 +9,16 @@ nextflow_process { tag "samtools" tag "samtools/view" - test("sarscov2 - [bam, []], [], []") { + test("bam") { when { process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), [] - ] + ]) input[1] = [[],[]] input[2] = [] """ @@ -28,34 +28,31 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - process.out.bai, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } ) } - } - test("homo_sapiens - [cram, crai], fasta, []") { + test("cram") { when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = [] """ } @@ -64,36 +61,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.cram[0][1]).name, - process.out.bam, - process.out.sam, - process.out.bai, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } ) } - } - test("homo_sapiens - [cram, []], fasta, [] - bam output") { + test("cram_to_bam") { config "./bam.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = [] """ } @@ -102,36 +96,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - process.out.bai, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } ) } - } - test("homo_sapiens - [cram, []], fasta, [] - bam & index output") { + test("cram_to_bam_index") { config "./bam_index.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = [] """ } @@ -140,36 +131,33 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.bai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } ) } - } - test("homo_sapiens - [cram, []], fasta, qname - bam & index output") { + test("cram_to_bam_index_qname") { config "./bam_index.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), [] - ] - input[1] = [ - [ id:'genome' ], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) """ } @@ -178,21 +166,18 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.bai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } ) } - } - test("sarscov2 - [bam, []], [], [] - stub") { + test("bam_stub") { options "-stub" config "./bam_index.config" @@ -200,11 +185,11 @@ nextflow_process { when { process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), [] - ] + ]) input[1] = [[],[]] input[2] = [] """ @@ -214,18 +199,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.sam, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.bai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } ) } - } - } diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap index 83427491..f55943a7 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -1,140 +1,488 @@ { - "homo_sapiens - [cram, []], fasta, [] - bam output": { + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ [ - - ], + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:09.713353823" + }, + "cram_to_bam_index_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ [ - ], - [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" ] ], - "timestamp": "2023-12-04T17:41:17.563069206" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" }, - "sarscov2 - [bam, []], [], []": { + "cram_to_bam_index_sam": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ [ - ], - [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" ] ], - "timestamp": "2023-12-04T17:41:03.206994564" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" }, - "homo_sapiens - [cram, []], fasta, qname - bam & index output": { + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "bam_csi": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ [ - ], - "test.bam.csi", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ [ - - ], + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:03.935041046" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" ] ], - "timestamp": "2023-12-04T17:44:39.165289759" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:55.910685496" }, - "homo_sapiens - [cram, []], fasta, [] - bam & index output": { + "cram_to_bam_bai": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ [ - - ], - "test.bam.csi", + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:47.715221169" + }, + "cram_bam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] ], - "timestamp": "2023-12-04T17:44:32.25731224" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" }, - "sarscov2 - [bam, []], [], [] - stub": { + "cram_to_bam_index_qname_crai": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ [ - ], - "test.csi", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:31.692607421" + }, + "cram_to_bam_index_qname_cram": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] ], - "timestamp": "2023-12-04T17:44:45.81037195" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" }, - "homo_sapiens - [cram, crai], fasta, []": { + "bam_cram": { "content": [ - "test.cram", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:39.913411036" + }, + "bam_sam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ [ - "versions.yml:md5,06b9049228b111e7bed5c52fe8a98d9b" + ] ], - "timestamp": "2023-12-04T17:41:10.730011823" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" } } \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml index 42c4e8af..7abe3644 100644 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ b/modules/nf-core/seqtk/subseq/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::seqtk=1.3 + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf index 91d2dff3..d5caebc3 100644 --- a/modules/nf-core/seqtk/subseq/main.nf +++ b/modules/nf-core/seqtk/subseq/main.nf @@ -4,23 +4,23 @@ process SEQTK_SUBSEQ { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/seqtk:1.3--h5bf99c6_3' : - 'biocontainers/seqtk:1.3--h5bf99c6_3' }" + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" input: - path sequences + tuple val(meta), path(sequences) path filter_list output: - path "*.gz" , emit: sequences - path "versions.yml" , emit: versions + tuple val(meta), path("*.gz"), emit: sequences + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def ext = "fa" if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { ext = "fq" @@ -38,4 +38,19 @@ process SEQTK_SUBSEQ { seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ + echo "" | gzip > ${sequences}${prefix}.${ext}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml index 2cb8858d..4e8ee19f 100644 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ b/modules/nf-core/seqtk/subseq/meta.yml @@ -1,7 +1,9 @@ name: seqtk_subseq description: Select only sequences that match the filtering condition keywords: - - filtering,selection + - filtering + - selection + - fastx tools: - seqtk: description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test new file mode 100644 index 00000000..be5602e3 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SEQTK_SUBSEQ" + script "modules/nf-core/seqtk/subseq/main.nf" + process "SEQTK_SUBSEQ" + config "./standard.config" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/subseq" + + test("sarscov2_subseq_fa") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2_subseq_fa_stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap new file mode 100644 index 00000000..75b3793e --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_subseq_fa": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,31c95c4d686526cf002f6119bc55b2b2" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:36.155954" + }, + "sarscov2_subseq_fa_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ], + "sequences": [ + [ + { + "id": "test" + }, + "genome.fasta.filtered.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,cd7682f4da748ef6d083c4a4656cc1e2" + ] + } + ], + "timestamp": "2024-02-22T15:56:44.222329" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/standard.config b/modules/nf-core/seqtk/subseq/tests/standard.config new file mode 100644 index 00000000..e8d7dc30 --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/standard.config @@ -0,0 +1,5 @@ +process { + withName: SEQTK_SUBSEQ { + ext.prefix = { ".filtered" } + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/subseq/tests/tags.yml b/modules/nf-core/seqtk/subseq/tests/tags.yml new file mode 100644 index 00000000..74056bab --- /dev/null +++ b/modules/nf-core/seqtk/subseq/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/subseq: + - "modules/nf-core/seqtk/subseq/**" diff --git a/modules/nf-core/windowmasker/mkcounts/environment.yml b/modules/nf-core/windowmasker/mkcounts/environment.yml index 15887425..e4d72108 100644 --- a/modules/nf-core/windowmasker/mkcounts/environment.yml +++ b/modules/nf-core/windowmasker/mkcounts/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::blast=2.14.0 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/mkcounts/main.nf b/modules/nf-core/windowmasker/mkcounts/main.nf index 6bfd175e..406f7761 100644 --- a/modules/nf-core/windowmasker/mkcounts/main.nf +++ b/modules/nf-core/windowmasker/mkcounts/main.nf @@ -4,8 +4,8 @@ process WINDOWMASKER_MKCOUNTS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': - 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta), path(ref) @@ -21,11 +21,11 @@ process WINDOWMASKER_MKCOUNTS { def args = task.ext.args ?: "" def prefix = task.ext.prefix ?: "${meta.id}" - def memory = 3072 + def memory = 3072 if (!task.memory) { log.info '[WINDOWMASKER: MK_COUNTS] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - memory = (task.memory.toMega()).intValue() + memory = (task.memory.toMega()).intValue() } """ diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test new file mode 100644 index 00000000..18c4977c --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process WINDOWMASKER_MKCOUNTS" + script "../main.nf" + process "WINDOWMASKER_MKCOUNTS" + + tag "modules" + tag "modules_nfcore" + tag "windowmasker" + tag "windowmasker/mkcounts" + + test("sarscov2_fasta") { + + when { + params { + // define parameters here. Example: + // outdir = "tests/results" + } + process { + """ + input[0] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + test("sarscov2_fasta_stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap new file mode 100644 index 00000000..cae2d306 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,5f5d7e926fdf13b0c57651f962cc1253" + ] + ], + "1": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ], + "counts": [ + [ + { + "id": "test" + }, + "test.txt:md5,5f5d7e926fdf13b0c57651f962cc1253" + ] + ], + "versions": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ] + } + ], + "timestamp": "2024-02-15T13:29:58.837482" + }, + "sarscov2_fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ], + "counts": [ + [ + { + "id": "test" + }, + "test.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,57ae356f69298e25eb5d070900865cf2" + ] + } + ], + "timestamp": "2024-02-15T13:30:07.618636" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config b/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config new file mode 100644 index 00000000..65fc1910 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: WINDOWMASKER_MKCOUNTS { + ext.args = "" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/mkcounts/tests/tags.yml b/modules/nf-core/windowmasker/mkcounts/tests/tags.yml new file mode 100644 index 00000000..95c67635 --- /dev/null +++ b/modules/nf-core/windowmasker/mkcounts/tests/tags.yml @@ -0,0 +1,2 @@ +windowmasker/mkcounts: + - "modules/nf-core/windowmasker/mkcounts/**" diff --git a/modules/nf-core/windowmasker/ustat/environment.yml b/modules/nf-core/windowmasker/ustat/environment.yml index a97fdd9d..b83d82e5 100644 --- a/modules/nf-core/windowmasker/ustat/environment.yml +++ b/modules/nf-core/windowmasker/ustat/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::blast=2.14.0 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/ustat/main.nf b/modules/nf-core/windowmasker/ustat/main.nf index 2cc3df63..7a7d29f6 100644 --- a/modules/nf-core/windowmasker/ustat/main.nf +++ b/modules/nf-core/windowmasker/ustat/main.nf @@ -4,8 +4,8 @@ process WINDOWMASKER_USTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.0--h7d5a4b4_1': - 'biocontainers/blast:2.14.0--h7d5a4b4_1' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta) , path(counts) diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test b/modules/nf-core/windowmasker/ustat/tests/main.nf.test new file mode 100644 index 00000000..58d91b13 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + + name "Test Process WINDOWMASKER_USTAT" + script "../main.nf" + process "WINDOWMASKER_USTAT" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "windowmasker" + tag "windowmasker/ustat" + tag "windowmasker/mkcounts" + + + setup { + run("WINDOWMASKER_MKCOUNTS") { + script "../../mkcounts/main.nf" + process { + """ + input[0] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + } + + test("sarscov2_fasta") { + when { + process { + """ + input[0] = WINDOWMASKER_MKCOUNTS.out.counts + input[1] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + then { + assert process.success + assert snapshot(process.out).match() + } + } + + test("sarscov2_fasta_stub") { + when { + process { + """ + input[0] = WINDOWMASKER_MKCOUNTS.out.counts + input[1] = [ + [id: "test" ], + [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + ] + """ + } + } + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap b/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap new file mode 100644 index 00000000..79d3d82d --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test.snap @@ -0,0 +1,60 @@ +{ + "sarscov2_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "1": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ], + "intervals": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "versions": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ] + } + ], + "timestamp": "2024-02-15T14:19:12.033774" + }, + "sarscov2_fasta_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "1": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ], + "intervals": [ + [ + { + "id": "test" + }, + "test.interval:md5,c91346601564ab88cbb0f913881d05e2" + ] + ], + "versions": [ + "versions.yml:md5,d43f04bb181ac80da9ec79d9b49131cf" + ] + } + ], + "timestamp": "2024-02-15T14:19:21.850526" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/ustat/tests/nextflow.config b/modules/nf-core/windowmasker/ustat/tests/nextflow.config new file mode 100644 index 00000000..00b63c45 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'test_windowmasker_ustat:WINDOWMASKER_USTAT' { + ext.args = "-dust true -outfmt interval" + } +} \ No newline at end of file diff --git a/modules/nf-core/windowmasker/ustat/tests/tags.yml b/modules/nf-core/windowmasker/ustat/tests/tags.yml new file mode 100644 index 00000000..28c74ca9 --- /dev/null +++ b/modules/nf-core/windowmasker/ustat/tests/tags.yml @@ -0,0 +1,2 @@ +windowmasker/ustat: + - "modules/nf-core/windowmasker/ustat/**" diff --git a/subworkflows/local/run_blastn.nf b/subworkflows/local/run_blastn.nf index 5e3c913f..a034fe96 100644 --- a/subworkflows/local/run_blastn.nf +++ b/subworkflows/local/run_blastn.nf @@ -29,20 +29,17 @@ workflow RUN_BLASTN { NOHIT_LIST ( blast_table, fasta ) ch_versions = ch_versions.mix ( NOHIT_LIST.out.versions.first() ) - // Subset of sequences with no hits (meta is not propagated in this step) + // Subset of sequences with no hits SEQTK_SUBSEQ ( - fasta.map { meta, genome -> genome }, + fasta, NOHIT_LIST.out.nohitlist.map { meta, nohit -> nohit } ) ch_versions = ch_versions.mix ( SEQTK_SUBSEQ.out.versions.first() ) // Split long contigs into chunks - // add meta to fasta subset channel: [ val(meta), path(compressed_fasta) ] - ch_gz = fasta.combine(SEQTK_SUBSEQ.out.sequences).map { meta, genome, seq -> [ meta, seq ] } - // uncompress fasta - GUNZIP ( ch_gz ) + GUNZIP ( SEQTK_SUBSEQ.out.sequences ) // create chunks BLOBTOOLKIT_CHUNK ( GUNZIP.out.gunzip, [[],[]] ) From ff43207c1b8b2cf5984a705384a1a82bb9fc53c3 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 19:32:53 +0000 Subject: [PATCH 24/36] No need to compress the file if we need to decompress it right after --- modules.json | 3 +- modules/nf-core/seqtk/subseq/main.nf | 11 ++--- .../nf-core/seqtk/subseq/seqtk-subseq.diff | 48 +++++++++++++++++++ subworkflows/local/run_blastn.nf | 6 +-- 4 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 modules/nf-core/seqtk/subseq/seqtk-subseq.diff diff --git a/modules.json b/modules.json index e255f552..7192d14a 100644 --- a/modules.json +++ b/modules.json @@ -82,7 +82,8 @@ "seqtk/subseq": { "branch": "master", "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, "windowmasker/mkcounts": { "branch": "master", diff --git a/modules/nf-core/seqtk/subseq/main.nf b/modules/nf-core/seqtk/subseq/main.nf index d5caebc3..5edae0e8 100644 --- a/modules/nf-core/seqtk/subseq/main.nf +++ b/modules/nf-core/seqtk/subseq/main.nf @@ -12,7 +12,7 @@ process SEQTK_SUBSEQ { path filter_list output: - tuple val(meta), path("*.gz"), emit: sequences + tuple val(meta), path("*.${ext}"), emit: sequences path "versions.yml", emit: versions when: @@ -21,7 +21,7 @@ process SEQTK_SUBSEQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" + ext = "fa" if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { ext = "fq" } @@ -30,8 +30,7 @@ process SEQTK_SUBSEQ { subseq \\ $args \\ $sequences \\ - $filter_list | \\ - gzip --no-name > ${sequences}${prefix}.${ext}.gz + $filter_list > ${sequences}${prefix}.${ext} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -41,12 +40,12 @@ process SEQTK_SUBSEQ { stub: def prefix = task.ext.prefix ?: "${meta.id}" - def ext = "fa" + ext = "fa" if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { ext = "fq" } """ - echo "" | gzip > ${sequences}${prefix}.${ext}.gz + touch ${sequences}${prefix}.${ext} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/seqtk/subseq/seqtk-subseq.diff b/modules/nf-core/seqtk/subseq/seqtk-subseq.diff new file mode 100644 index 00000000..5fc2af8a --- /dev/null +++ b/modules/nf-core/seqtk/subseq/seqtk-subseq.diff @@ -0,0 +1,48 @@ +Changes in module 'nf-core/seqtk/subseq' +--- modules/nf-core/seqtk/subseq/main.nf ++++ modules/nf-core/seqtk/subseq/main.nf +@@ -12,7 +12,7 @@ + path filter_list + + output: +- tuple val(meta), path("*.gz"), emit: sequences ++ tuple val(meta), path("*.${ext}"), emit: sequences + path "versions.yml", emit: versions + + when: +@@ -21,7 +21,7 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def ext = "fa" ++ ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } +@@ -30,8 +30,7 @@ + subseq \\ + $args \\ + $sequences \\ +- $filter_list | \\ +- gzip --no-name > ${sequences}${prefix}.${ext}.gz ++ $filter_list > ${sequences}${prefix}.${ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -41,12 +40,12 @@ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" +- def ext = "fa" ++ ext = "fa" + if ("$sequences" ==~ /.+\.fq|.+\.fq.gz|.+\.fastq|.+\.fastq.gz/) { + ext = "fq" + } + """ +- echo "" | gzip > ${sequences}${prefix}.${ext}.gz ++ touch ${sequences}${prefix}.${ext} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/subworkflows/local/run_blastn.nf b/subworkflows/local/run_blastn.nf index a034fe96..cc1fa6c5 100644 --- a/subworkflows/local/run_blastn.nf +++ b/subworkflows/local/run_blastn.nf @@ -5,7 +5,6 @@ include { NOHIT_LIST } from '../../modules/local/nohit_list' include { SEQTK_SUBSEQ } from '../../modules/nf-core/seqtk/subseq/main' -include { GUNZIP } from '../../modules/nf-core/gunzip/main' include { BLOBTOOLKIT_CHUNK } from '../../modules/local/blobtoolkit/chunk' include { BLAST_BLASTN as BLASTN_TAXON } from '../../modules/nf-core/blast/blastn/main' include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' @@ -38,11 +37,8 @@ workflow RUN_BLASTN { // Split long contigs into chunks - // uncompress fasta - GUNZIP ( SEQTK_SUBSEQ.out.sequences ) - // create chunks - BLOBTOOLKIT_CHUNK ( GUNZIP.out.gunzip, [[],[]] ) + BLOBTOOLKIT_CHUNK ( SEQTK_SUBSEQ.out.sequences, [[],[]] ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CHUNK.out.versions.first() ) From b967b512928364796c7d0e7f198b89e165d0ef63 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 20:11:35 +0000 Subject: [PATCH 25/36] Count the number of reads in each input file --- modules.json | 5 ++ .../nf-core/samtools/flagstat/environment.yml | 8 +++ modules/nf-core/samtools/flagstat/main.nf | 46 +++++++++++++++++ modules/nf-core/samtools/flagstat/meta.yml | 51 +++++++++++++++++++ .../samtools/flagstat/tests/main.nf.test | 36 +++++++++++++ .../samtools/flagstat/tests/main.nf.test.snap | 32 ++++++++++++ .../nf-core/samtools/flagstat/tests/tags.yml | 2 + subworkflows/local/input_check.nf | 40 ++++++++++++--- workflows/blobtoolkit.nf | 4 +- 9 files changed, 216 insertions(+), 8 deletions(-) create mode 100644 modules/nf-core/samtools/flagstat/environment.yml create mode 100644 modules/nf-core/samtools/flagstat/main.nf create mode 100644 modules/nf-core/samtools/flagstat/meta.yml create mode 100644 modules/nf-core/samtools/flagstat/tests/main.nf.test create mode 100644 modules/nf-core/samtools/flagstat/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/flagstat/tests/tags.yml diff --git a/modules.json b/modules.json index 7192d14a..b5ad891c 100644 --- a/modules.json +++ b/modules.json @@ -69,6 +69,11 @@ "installed_by": ["modules"], "patch": "modules/nf-core/samtools/fasta/samtools-fasta.diff" }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["modules"] + }, "samtools/index": { "branch": "master", "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..bd57cb54 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..eb5f5252 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..97991358 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..24c3c04b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..a76fc27e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "flagstat": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:44.299617452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 00000000..2d2b7255 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index fbb397f0..da522ca8 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -3,6 +3,7 @@ // include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' include { FETCHNGSSAMPLESHEET_CHECK } from '../../modules/local/fetchngssamplesheet_check' include { BLOBTOOLKIT_CONFIG } from '../../modules/local/blobtoolkit/config' @@ -35,27 +36,37 @@ workflow INPUT_CHECK { | map { meta, file -> meta.row + [fastq_1: file] } | mix ( reads_pairedness.not_paired ) | map { create_data_channels_from_fetchngs(it) } - | set { aln } + | set { read_files } } else { SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) .map { create_data_channels(it) } - .set { aln } + .set { read_files } ch_versions = ch_versions.mix ( SAMPLESHEET_CHECK.out.versions.first() ) } + // Extract the read counts + SAMTOOLS_FLAGSTAT ( read_files.map { meta, datafile -> [meta, datafile, []] } ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + + read_files + | join( SAMTOOLS_FLAGSTAT.out.flagstat ) + | map { meta, datafile, stats -> [meta + get_read_counts(stats), datafile] } + | set { reads } + + if ( !params.yaml ) { - aln + read_files | map { meta, data -> meta.id.split("_")[0..-2].join("_") } | combine ( fasta ) | map { sample, meta, fasta -> [ meta, sample ] } | groupTuple() - | set { reads } + | set { grouped_reads } - BLOBTOOLKIT_CONFIG ( reads, fasta ) + BLOBTOOLKIT_CONFIG ( grouped_reads, fasta ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CONFIG.out.versions.first() ) ch_config = BLOBTOOLKIT_CONFIG.out.yaml } else { @@ -63,7 +74,7 @@ workflow INPUT_CHECK { } emit: - aln // channel: [ val(meta), path(datafile) ] + reads // channel: [ val(meta), path(datafile) ] config = ch_config // channel: [ val(meta), path(yaml) ] versions = ch_versions // channel: [ versions.yml ] } @@ -127,3 +138,20 @@ def create_data_channels_from_fetchngs(LinkedHashMap row) { return data_meta } +// Function to get the read counts from a samtools flagstat file +def get_read_counts ( stats ) { + // create meta map + def read_count_meta = [:] + + // Read the first line of the flagstat file + // 3127898040 + 0 in total (QC-passed reads + QC-failed reads) + // and make the sum of both integers + stats.withReader { + line = it.readLine() + def lspl = line.split() + def read_count = lspl[0].toLong() + lspl[2].toLong() + read_count_meta.read_count = read_count + } + + return read_count_meta +} diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index f1917a08..f25da1eb 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -112,11 +112,11 @@ workflow BLOBTOOLKIT { // SUBWORKFLOW: Optional read alignment // if ( params.align ) { - MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.aln, PREPARE_GENOME.out.genome ) + MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.reads, PREPARE_GENOME.out.genome ) ch_versions = ch_versions.mix ( MINIMAP2_ALIGNMENT.out.versions ) ch_aligned = MINIMAP2_ALIGNMENT.out.aln } else { - ch_aligned = INPUT_CHECK.out.aln + ch_aligned = INPUT_CHECK.out.reads } // From bf8251134bf0f11353fac41c4200f30565ffc9c1 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 22 Mar 2024 20:48:59 +0000 Subject: [PATCH 26/36] Optimised settings for minimap2, taken from the read-mapping pipeline --- conf/base.config | 20 +++++++++++++++++++ modules.json | 3 ++- modules/nf-core/minimap2/align/main.nf | 1 - .../minimap2/align/minimap2-align.diff | 12 +++++++++++ 4 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/minimap2/align/minimap2-align.diff diff --git a/conf/base.config b/conf/base.config index 8764d360..b654fe04 100644 --- a/conf/base.config +++ b/conf/base.config @@ -53,6 +53,26 @@ process { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } + withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_CCS' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + + // Extrapolated from the HIFI settings on the basis of 1 ONT alignment. CLR assumed to behave the same way as ONT + withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(CLR|ONT)' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 30.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 1.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + + // Temporarily the same settings as CCS + withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(HIC|ILMN)' { + cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } + memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } + time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + } + withName: 'WINDOWSTATS_INPUT' { cpus = { check_max( 1 , 'cpus' ) } // 2 GB per 1 Gbp diff --git a/modules.json b/modules.json index b5ad891c..667a4482 100644 --- a/modules.json +++ b/modules.json @@ -56,7 +56,8 @@ "minimap2/align": { "branch": "master", "git_sha": "2c2d1cf80866dbd6dd0ea5d61ddd59533a72d41e", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 661bd23d..7030554d 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -1,6 +1,5 @@ process MINIMAP2_ALIGN { tag "$meta.id" - label 'process_medium' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff new file mode 100644 index 00000000..479818b3 --- /dev/null +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -0,0 +1,12 @@ +Changes in module 'nf-core/minimap2/align' +--- modules/nf-core/minimap2/align/main.nf ++++ modules/nf-core/minimap2/align/main.nf +@@ -1,6 +1,5 @@ + process MINIMAP2_ALIGN { + tag "$meta.id" +- label 'process_medium' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + +************************************************************ From fd86c4e9da26927949a5f7444028c4f5c4d8ee89 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Mar 2024 22:20:04 +0000 Subject: [PATCH 27/36] Updated these versions too --- docs/usage.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 0578c2bb..4789ff84 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -230,7 +230,7 @@ List of tools for any given dataset can be fetched from the API, for example htt | Dependency | Snakemake | Nextflow | | ----------------- | --------- | -------- | | blobtoolkit | 4.3.2 | 4.3.9 | -| blast | 2.12.0 | 2.14.1 | +| blast | 2.12.0 | 2.15.0 | | blobtk | 0.5.0 | 0.5.1 | | busco | 5.3.2 | 5.5.0 | | diamond | 2.0.15 | 2.1.8 | @@ -240,8 +240,8 @@ List of tools for any given dataset can be fetched from the API, for example htt | ncbi-datasets-cli | 14.1.0 | | | nextflow | | 23.10.0 | | python | 3.9.13 | 3.12.0 | -| samtools | 1.15.1 | 1.18 | -| seqtk | 1.3 | | +| samtools | 1.15.1 | 1.19.2 | +| seqtk | 1.3 | 1.4 | | snakemake | 7.19.1 | | | windowmasker | 2.12.0 | 2.14.0 | From c8253f528fd89f5ee37bf4716ba280c3efa5d526 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 26 Mar 2024 23:01:34 +0000 Subject: [PATCH 28/36] Slightly increased the runtime This should not affect the normal/long assignment much --- conf/base.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index b654fe04..0f6fdaba 100644 --- a/conf/base.config +++ b/conf/base.config @@ -56,7 +56,7 @@ process { withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_CCS' { cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } - time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + time = { check_max( 4.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } } // Extrapolated from the HIFI settings on the basis of 1 ONT alignment. CLR assumed to behave the same way as ONT @@ -105,7 +105,7 @@ process { meta.genome_size < 5000000000 ? 64.GB : 192.GB ) * ((task.attempt+1)/2) , 'memory' ) } cpus = { log_increase_cpus(4, 2*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } - time = { check_max( 2.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } + time = { check_max( 4.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } } withName:CUSTOM_DUMPSOFTWAREVERSIONS { From eecf569e9c2f99f496d7a50315f66e6d9ad6a667 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Thu, 28 Mar 2024 08:43:12 +0000 Subject: [PATCH 29/36] Missing space --- modules/local/compressblobdir.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/compressblobdir.nf b/modules/local/compressblobdir.nf index efb1b7a5..694d415a 100644 --- a/modules/local/compressblobdir.nf +++ b/modules/local/compressblobdir.nf @@ -31,7 +31,7 @@ process COMPRESSBLOBDIR { cat <<-END_VERSIONS > versions.yml "${task.process}": - pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ) END_VERSIONS """ } From 4c723c797ee1577e9e9386458e5d6ec22139ab21 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 2 Apr 2024 09:22:06 +0000 Subject: [PATCH 30/36] Tidy up the busco directory before publication --- conf/modules.config | 3 ++ modules/local/restructurebuscodir.nf | 44 ++++++++++++++++++++++ subworkflows/local/busco_diamond_blastp.nf | 15 ++++++++ 3 files changed, 62 insertions(+) create mode 100644 modules/local/restructurebuscodir.nf diff --git a/conf/modules.config b/conf/modules.config index 2ab3ba99..439a77b3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -67,6 +67,9 @@ process { // Note: BUSCO *must* see the double-quotes around the parameters '--force --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' : '--force' } + } + + withName: "RESTRUCTUREBUSCODIR" { publishDir = [ path: { "${params.outdir}/busco" }, mode: params.publish_dir_mode, diff --git a/modules/local/restructurebuscodir.nf b/modules/local/restructurebuscodir.nf new file mode 100644 index 00000000..c2cdf3ee --- /dev/null +++ b/modules/local/restructurebuscodir.nf @@ -0,0 +1,44 @@ +process RESTRUCTUREBUSCODIR { + tag "${meta.id}_${lineage}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), val(lineage), path(batch_summary), path(short_summaries_txt), path(short_summaries_json), path(busco_dir) + + output: + tuple val(meta), path("${lineage}"), emit: clean_busco_dir + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${lineage} + + cp --dereference ${batch_summary} ${lineage}/short_summary.tsv + cp --dereference ${short_summaries_txt} ${lineage}/short_summary.txt + cp --dereference ${short_summaries_json} ${lineage}/short_summary.json + + # Should we compress these ? + cp ${busco_dir}/*/run_*/full_table.tsv ${lineage}/ + cp ${busco_dir}/*/run_*/missing_busco_list.tsv ${lineage}/ + + tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences single_copy_busco_sequences + tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences multi_copy_busco_sequences + tar czf ${lineage}/fragmented_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences fragmented_busco_sequences + tar czf ${lineage}/hmmer_output.tar.gz --exclude=.checkpoint -C ${busco_dir}/*/run_* hmmer_output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tar: \$(tar --version| awk 'NR==1 {print \$3}' ) + END_VERSIONS + """ +} diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index a43b26dd..fa98c6be 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -6,6 +6,7 @@ include { GOAT_TAXONSEARCH } from '../../modules/nf-core/goat/taxonsear include { BUSCO } from '../../modules/nf-core/busco/main' include { BLOBTOOLKIT_EXTRACTBUSCOS } from '../../modules/local/blobtoolkit/extractbuscos' include { DIAMOND_BLASTP } from '../../modules/nf-core/diamond/blastp/main' +include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebuscodir' workflow BUSCO_DIAMOND { @@ -61,6 +62,20 @@ workflow BUSCO_DIAMOND { ch_versions = ch_versions.mix ( BUSCO.out.versions.first() ) + // + // Tidy up the BUSCO output directories before publication + // + RESTRUCTUREBUSCODIR( + BUSCO.out.seq_dir + | map { meta, seq -> [meta, seq.parent.baseName.minus("run_")] } + | join ( BUSCO.out.batch_summary ) + | join ( BUSCO.out.short_summaries_txt ) + | join ( BUSCO.out.short_summaries_json ) + | join ( BUSCO.out.busco_dir ) + ) + ch_versions = ch_versions.mix ( RESTRUCTUREBUSCODIR.out.versions.first() ) + + // // Select input for BLOBTOOLKIT_EXTRACTBUSCOS // From e273433ea12d9663e1886bd1f655e7065bd3c9b1 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 2 Apr 2024 17:25:39 +0000 Subject: [PATCH 31/36] Actually need more resources for BUSCO --- conf/base.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/base.config b/conf/base.config index 0f6fdaba..48d8f30d 100644 --- a/conf/base.config +++ b/conf/base.config @@ -102,10 +102,10 @@ process { meta.genome_size < 500000000 ? 8.GB : meta.genome_size < 1000000000 ? 16.GB : meta.genome_size < 2000000000 ? 32.GB : - meta.genome_size < 5000000000 ? 64.GB : 192.GB + meta.genome_size < 5000000000 ? 96.GB : 192.GB ) * ((task.attempt+1)/2) , 'memory' ) } cpus = { log_increase_cpus(4, 2*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } - time = { check_max( 4.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } + time = { check_max( 6.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } } withName:CUSTOM_DUMPSOFTWAREVERSIONS { From cf1dc9815e277aa6a757672ef0b9049a85b30bda Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 2 Apr 2024 17:34:36 +0000 Subject: [PATCH 32/36] bugfix: meta needs to have the lineage information so that the join can pair all outputs from the same genome --- modules/local/blobtoolkit/extractbuscos.nf | 2 +- subworkflows/local/busco_diamond_blastp.nf | 57 +++++++++++++--------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/modules/local/blobtoolkit/extractbuscos.nf b/modules/local/blobtoolkit/extractbuscos.nf index a54a9437..1e4440cb 100644 --- a/modules/local/blobtoolkit/extractbuscos.nf +++ b/modules/local/blobtoolkit/extractbuscos.nf @@ -9,7 +9,7 @@ process BLOBTOOLKIT_EXTRACTBUSCOS { input: tuple val(meta), path(fasta) - tuple val(metaseq), path(seq, stageAs: "lineage??/*") + path seq, stageAs: "lineage??/*" output: tuple val(meta), path("*_buscogenes.fasta"), emit: genes diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index fa98c6be..f2bb2340 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -43,22 +43,36 @@ workflow BUSCO_DIAMOND { // - // Run BUSCO search + // Prepare the BUSCO linages // + // 0. Initialise sone variables + basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ] + def lineage_position = 0 + // 1. Parse the GOAT_TAXONSEARCH output GOAT_TAXONSEARCH.out.taxonsearch | map { meta, csv -> csv.splitCsv(header:true, sep:'\t', strip:true) } | map { row -> row.odb10_lineage.findAll { it != "" } } - | set { ch_ancestral_lineages } - - - // Add the basal lineages to the list (excluding duplicates) - basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ] - ch_ancestral_lineages + // 2. Add the (missing) basal lineages | map { lineages -> (lineages + basal_lineages).unique() } | flatten () - | set { ch_lineages } + // 3. Add a (0-based) index to record the original order (i.e. by age) + | map { lineage_name -> [lineage_name, lineage_position++] } + // 4. Move the lineage information to `meta` to be able to distinguish the BUSCO jobs and group their outputs later + | combine ( fasta ) + | map { lineage_name, lineage_index, meta, genome -> [meta + [lineage_name: lineage_name, lineage_index: lineage_index], genome] } + | set { ch_fasta_with_lineage } - BUSCO ( fasta, "genome", ch_lineages, busco_db.collect().ifEmpty([]), [] ) + + // + // Run BUSCO search + // + BUSCO ( + ch_fasta_with_lineage, + "genome", + ch_fasta_with_lineage.map { it[0].lineage_name }, + busco_db.collect().ifEmpty([]), + [], + ) ch_versions = ch_versions.mix ( BUSCO.out.versions.first() ) @@ -67,7 +81,7 @@ workflow BUSCO_DIAMOND { // RESTRUCTUREBUSCODIR( BUSCO.out.seq_dir - | map { meta, seq -> [meta, seq.parent.baseName.minus("run_")] } + | map { meta, seq -> [meta, meta.lineage_name] } | join ( BUSCO.out.batch_summary ) | join ( BUSCO.out.short_summaries_txt ) | join ( BUSCO.out.short_summaries_json ) @@ -80,8 +94,9 @@ workflow BUSCO_DIAMOND { // Select input for BLOBTOOLKIT_EXTRACTBUSCOS // BUSCO.out.seq_dir - | filter { meta, seq -> basal_lineages.contains(seq.parent.baseName.minus("run_")) } - | groupTuple() + | filter { meta, seq -> basal_lineages.contains(meta.lineage_name) } + | map { meta, seq -> seq } + | collect | set { ch_basal_buscos } @@ -101,20 +116,14 @@ workflow BUSCO_DIAMOND { ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() ) - // Index the lineages in the taxonomic order - def lineage_position = 0 - ch_lineages - | map { lineage -> [lineage, lineage_position++] } - | set { ch_ordered_lineages } - - - // Order BUSCO results according to ch_ordered_lineages + // Order BUSCO results according to the lineage index BUSCO.out.full_table - | map { meta, table -> [table.parent.baseName.minus("run_"), meta, table] } - | join ( ch_ordered_lineages ) - | map { lineage, meta, table, index -> [meta, table, index] } + // 1. Restore the original meta map, and pull the index as an extra tuple element + | map { meta, table -> [meta.findAll { it.key != "lineage_name" && it.key != "lineage_index" }, [table, meta.lineage_index]] } + // 2. Turn to a single-element channel that has the (one and only) meta map, and all the pairs (table, lineage index) concatenated as a list | groupTuple() - | map { meta, tables, positions -> [ meta, tables.withIndex().sort { a, b -> positions[a[1]] <=> positions[b[1]] } . collect { table, i -> table } ] } + // 3. Sort the pairs and discard the index + | map { meta, table_positions -> [ meta, table_positions.sort { a, b -> a[1] <=> b[1] } . collect { table, lineage_index -> table } ] } | set { ch_indexed_buscos } From 328e905a5a0c79fcea66ec3b073e0f9d11e5eecd Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 2 Apr 2024 17:33:21 +0000 Subject: [PATCH 33/36] bugfix: these files can be missing if no gene is found --- modules/local/restructurebuscodir.nf | 8 ++++---- subworkflows/local/busco_diamond_blastp.nf | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/local/restructurebuscodir.nf b/modules/local/restructurebuscodir.nf index c2cdf3ee..4c58b0ed 100644 --- a/modules/local/restructurebuscodir.nf +++ b/modules/local/restructurebuscodir.nf @@ -24,12 +24,12 @@ process RESTRUCTUREBUSCODIR { mkdir ${lineage} cp --dereference ${batch_summary} ${lineage}/short_summary.tsv - cp --dereference ${short_summaries_txt} ${lineage}/short_summary.txt - cp --dereference ${short_summaries_json} ${lineage}/short_summary.json + [ -n "${short_summaries_txt}" ] && cp --dereference ${short_summaries_txt} ${lineage}/short_summary.txt + [ -n "${short_summaries_json}" ] && cp --dereference ${short_summaries_json} ${lineage}/short_summary.json # Should we compress these ? - cp ${busco_dir}/*/run_*/full_table.tsv ${lineage}/ - cp ${busco_dir}/*/run_*/missing_busco_list.tsv ${lineage}/ + [ -e ${busco_dir}/*/run_*/full_table.tsv ] && cp ${busco_dir}/*/run_*/full_table.tsv ${lineage}/ + [ -e ${busco_dir}/*/run_*/missing_busco_list.tsv ] && cp ${busco_dir}/*/run_*/missing_busco_list.tsv ${lineage}/ tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences single_copy_busco_sequences tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences multi_copy_busco_sequences diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index f2bb2340..c3ebe104 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -83,9 +83,10 @@ workflow BUSCO_DIAMOND { BUSCO.out.seq_dir | map { meta, seq -> [meta, meta.lineage_name] } | join ( BUSCO.out.batch_summary ) - | join ( BUSCO.out.short_summaries_txt ) - | join ( BUSCO.out.short_summaries_json ) + | join ( BUSCO.out.short_summaries_txt, remainder: true ) + | join ( BUSCO.out.short_summaries_json, remainder: true ) | join ( BUSCO.out.busco_dir ) + | map { meta, lineage, batch_summary, short_summaries_txt, short_summaries_json, busco_dir -> [meta, lineage, batch_summary, short_summaries_txt ?: [], short_summaries_json ?: [], busco_dir] } ) ch_versions = ch_versions.mix ( RESTRUCTUREBUSCODIR.out.versions.first() ) From f4e236ff4bfb702ad806ef1b6f11e6196020c12b Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 8 Apr 2024 18:28:28 +0000 Subject: [PATCH 34/36] Better explanation --- nextflow.config | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index adc40371..83aaaafc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -291,13 +291,14 @@ def check_max(obj, type) { /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Increasing the number of CPUs often gives diminishing returns, so we increase it - following a logarithm curve. Example: - - 0 < value <= 1: start + step - - 1 < value <= 2: start + 2*step - - 2 < value <= 4: start + 3*step - - 4 < value <= 8: start + 4*step - In order to support re-runs, the step increase may be multiplied by the attempt - number prior to calling this function. + following a logarithm curve: + - 0 < value <= 1 : start + step + - 1 < value <= base : start + 2*step + - base < value <= base^2: start + 3*step + - base^2 < value <= base^3: start + 4*step + - etc + In order to support re-runs, start and/or step may be increased by a function of the + attempt number prior to calling this function. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ From 7417c5b3ebb78331b2942244e47f8969d37221c6 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 9 Apr 2024 08:28:53 +0000 Subject: [PATCH 35/36] Updated the BUSCO resources --- conf/base.config | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/conf/base.config b/conf/base.config index 48d8f30d..8f51f7f8 100644 --- a/conf/base.config +++ b/conf/base.config @@ -95,17 +95,13 @@ process { } withName: BUSCO { - // No straightforward formula, so using ranges instead. - // The memory is increased by half of the base value at every attempt. - memory = { check_max( ( - meta.genome_size < 100000000 ? 4.GB : - meta.genome_size < 500000000 ? 8.GB : - meta.genome_size < 1000000000 ? 16.GB : - meta.genome_size < 2000000000 ? 32.GB : - meta.genome_size < 5000000000 ? 96.GB : 192.GB - ) * ((task.attempt+1)/2) , 'memory' ) } - cpus = { log_increase_cpus(4, 2*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } - time = { check_max( 6.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } + // The formulas below are equivalent to these ranges: + // Gbp: [ 1, 2, 4, 8, 16] + // CPUs: [ 8, 12, 16, 20, 24] + // GB RAM: [16, 32, 64, 128, 256] + memory = { check_max( 1.GB * Math.pow(2, 3 + task.attempt + Math.ceil(positive_log(meta.genome_size/1000000000, 2))) , 'memory' ) } + cpus = { log_increase_cpus(4, 4*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } + time = { check_max( 3.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } } withName:CUSTOM_DUMPSOFTWAREVERSIONS { From 8af7fa838716341512881b3e5f43cdf9ae8c3a95 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 17 Apr 2024 09:21:21 +0000 Subject: [PATCH 36/36] Updated the README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c2f2a9fc..31512bc0 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ ## Introduction -**sanger-tol/blobtoolkit** is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes. It takes a samplesheet and aligned CRAM files as input, calculates genome statistics, coverage and completeness information, combines them in a TSV file by window size to create a BlobDir dataset and static plots. +**sanger-tol/blobtoolkit** is a bioinformatics pipeline that can be used to identify and analyse non-target DNA for eukaryotic genomes. +It takes a samplesheet of BAM/CRAM/FASTQ/FASTA files as input, calculates genome statistics, coverage and completeness information, combines them in a TSV file by window size to create a BlobDir dataset and static plots. 1. Calculate genome statistics in windows ([`fastawindows`](https://github.com/tolkit/fasta_windows)) 2. Calculate Coverage ([`blobtk/depth`](https://github.com/blobtoolkit/blobtk))
Process Name \\", + " \\ Software Version
CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
yaml5.4.1
TOOL1tool10.11.9
TOOL2tool21.9
WorkflowNextflow