From 7527e7a9b9215148fb91f2d7661ecad4ca19f43a Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 12 Jan 2024 14:25:50 +0000 Subject: [PATCH] Updated CAT_CAT module that preserves the file extension This means that .fastq.gz will remain .fastq.gz and can then match the condition to bypass the SAMTOOLS_FASTA process. There is a pull-request for this, https://github.com/nf-core/modules/pull/4230 --- modules.json | 3 ++- modules/nf-core/cat/cat/cat-cat.diff | 34 ++++++++++++++++++++++++++++ modules/nf-core/cat/cat/main.nf | 11 ++++++++- 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 modules/nf-core/cat/cat/cat-cat.diff diff --git a/modules.json b/modules.json index 2efbb154..d77a8341 100644 --- a/modules.json +++ b/modules.json @@ -20,7 +20,8 @@ "cat/cat": { "branch": "master", "git_sha": "d593e8f6b7d1bbbb2acf43a4b9efeeac8d6720f2", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/cat/cat/cat-cat.diff" }, "custom/dumpsoftwareversions": { "branch": "master", diff --git a/modules/nf-core/cat/cat/cat-cat.diff b/modules/nf-core/cat/cat/cat-cat.diff new file mode 100644 index 00000000..4d2fedae --- /dev/null +++ b/modules/nf-core/cat/cat/cat-cat.diff @@ -0,0 +1,34 @@ +Changes in module 'nf-core/cat/cat' +--- modules/nf-core/cat/cat/main.nf ++++ modules/nf-core/cat/cat/main.nf +@@ -22,6 +22,8 @@ + def args2 = task.ext.args2 ?: '' + def file_list = files_in.collect { it.toString() } + ++ // choose appropriate concatenation tool depending on input and output format ++ + // | input | output | command1 | command2 | + // |-----------|------------|----------|----------| + // | gzipped | gzipped | cat | | +@@ -30,7 +32,7 @@ + // | ungzipped | gzipped | cat | pigz | + + // Use input file ending as default +- prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" ++ prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" + out_zip = prefix.endsWith('.gz') + in_zip = file_list[0].endsWith('.gz') + command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' +@@ -68,3 +70,10 @@ + END_VERSIONS + """ + } ++ ++// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz ++def getFileSuffix(filename) { ++ def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ ++ return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) ++} ++ + +************************************************************ diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index 970ab760..adbdbd7b 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -22,6 +22,8 @@ process CAT_CAT { def args2 = task.ext.args2 ?: '' def file_list = files_in.collect { it.toString() } + // choose appropriate concatenation tool depending on input and output format + // | input | output | command1 | command2 | // |-----------|------------|----------|----------| // | gzipped | gzipped | cat | | @@ -30,7 +32,7 @@ process CAT_CAT { // | ungzipped | gzipped | cat | pigz | // Use input file ending as default - prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}" + prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}" out_zip = prefix.endsWith('.gz') in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' @@ -68,3 +70,10 @@ process CAT_CAT { END_VERSIONS """ } + +// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz +def getFileSuffix(filename) { + def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ + return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) +} +