diff --git a/modules.json b/modules.json index 8b6b55c..d504f8d 100644 --- a/modules.json +++ b/modules.json @@ -49,6 +49,11 @@ "branch": "master", "git_sha": "8f2062e7b4185590fb9f43c275381a31a6544fc0", "installed_by": ["modules"] + }, + "seqtk/cutn": { + "branch": "master", + "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", + "installed_by": ["modules"] } } }, diff --git a/modules/nf-core/seqtk/cutn/environment.yml b/modules/nf-core/seqtk/cutn/environment.yml new file mode 100644 index 0000000..a57afbb --- /dev/null +++ b/modules/nf-core/seqtk/cutn/environment.yml @@ -0,0 +1,7 @@ +name: seqtk_cutn +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/cutn/main.nf b/modules/nf-core/seqtk/cutn/main.nf new file mode 100644 index 0000000..c2344a8 --- /dev/null +++ b/modules/nf-core/seqtk/cutn/main.nf @@ -0,0 +1,49 @@ +process SEQTK_CUTN { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--he4a0461_1' : + 'biocontainers/seqtk:1.4--he4a0461_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.bed") , emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + seqtk \\ + cutN \\ + $args \\ + -g $fasta \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/seqtk/cutn/meta.yml b/modules/nf-core/seqtk/cutn/meta.yml new file mode 100644 index 0000000..1082867 --- /dev/null +++ b/modules/nf-core/seqtk/cutn/meta.yml @@ -0,0 +1,41 @@ +name: seqtk_cutn +description: Generates a BED file containing genomic locations of lengths of N. +keywords: + - cut + - fasta + - seqtk +tools: + - seqtk: + description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. Seqtk mergepe command merges pair-end reads into one interleaved file. + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: A single fasta file to be split. + pattern: "*.{fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: The output bed which summarised locations of cuts + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@DLBPointon" +maintainers: + - "@DLBPointon" diff --git a/modules/nf-core/seqtk/cutn/tests/main.nf.test b/modules/nf-core/seqtk/cutn/tests/main.nf.test new file mode 100644 index 0000000..a38ed41 --- /dev/null +++ b/modules/nf-core/seqtk/cutn/tests/main.nf.test @@ -0,0 +1,57 @@ +nextflow_process { + + name "Test Process SEQTK_CUTN" + script "../main.nf" + process "SEQTK_CUTN" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/cutn" + + test("homo_21_cut") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.bed[0][1]).name + ).match("genome_cut") + } + ) + } + } + + test("homo_21_cut_stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap b/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap new file mode 100644 index 0000000..998beda --- /dev/null +++ b/modules/nf-core/seqtk/cutn/tests/main.nf.test.snap @@ -0,0 +1,70 @@ +{ + "genome_cut": { + "content": [ + "test.bed" + ], + "timestamp": "2024-02-22T16:02:14.744148" + }, + "homo_21_cut_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + ], + "bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bed:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + ] + } + ], + "timestamp": "2024-02-22T16:02:23.596389" + }, + "homo_21_cut": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bed:md5,16cbba84e3a4bdbb52217afb5051f948" + ] + ], + "1": [ + "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + ], + "bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bed:md5,16cbba84e3a4bdbb52217afb5051f948" + ] + ], + "versions": [ + "versions.yml:md5,3da8ed2738f3c093d1e62d796fd76428" + ] + } + ], + "timestamp": "2024-02-22T16:02:14.695205" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/cutn/tests/tags.yml b/modules/nf-core/seqtk/cutn/tests/tags.yml new file mode 100644 index 0000000..13c64cc --- /dev/null +++ b/modules/nf-core/seqtk/cutn/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/cutn: + - "modules/nf-core/seqtk/cutn/**" diff --git a/workflows/pairgenomealign.nf b/workflows/pairgenomealign.nf index e1f6eda..bc76f6a 100644 --- a/workflows/pairgenomealign.nf +++ b/workflows/pairgenomealign.nf @@ -6,6 +6,8 @@ include { ASSEMBLYSCAN } from '../modules/nf-core/assemblyscan/main' include { PAIRALIGN_M2M } from '../subworkflows/local/pairalign_m2m/main' +include { SEQTK_CUTN as SEQTK_CUTN_TARGET } from '../modules/nf-core/seqtk/cutn/main' +include { SEQTK_CUTN as SEQTK_CUTN_QUERY } from '../modules/nf-core/seqtk/cutn/main' include { PAIRALIGN_M2O } from '../subworkflows/local/pairalign_m2o/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-validation' @@ -30,6 +32,20 @@ workflow PAIRGENOMEALIGN { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() + // + // MODULE: seqtk_cutn_target + // + SEQTK_CUTN_TARGET ( + ch_targetgenome + ) + + // + // MODULE: seqtk_cutn_query + // + SEQTK_CUTN_QUERY ( + ch_samplesheet + ) + // // MODULE: assembly-scan //