From c2af30ebe99d1b626f265fd5c87d7a0c0a31794b Mon Sep 17 00:00:00 2001 From: Jim Downie Date: Fri, 29 Nov 2024 15:40:24 +0000 Subject: [PATCH] Fix taxonomy WF, add resource requirements for GTDB --- conf/base.config | 15 +++++++++++++-- subworkflows/local/bin_taxonomy.nf | 9 ++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/conf/base.config b/conf/base.config index 22c5ffc..ed45d3e 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } @@ -24,39 +23,51 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } } + withLabel:process_low { cpus = { 2 * task.attempt } memory = { 12.GB * task.attempt } time = { 4.h * task.attempt } } + withLabel:process_medium { cpus = { 6 * task.attempt } memory = { 36.GB * task.attempt } time = { 8.h * task.attempt } } + withLabel:process_high { cpus = { 12 * task.attempt } memory = { 72.GB * task.attempt } time = { 16.h * task.attempt } } + withLabel:process_long { time = { 20.h * task.attempt } } + withLabel:process_high_memory { memory = { 200.GB * task.attempt } } + withLabel:error_ignore { errorStrategy = 'ignore' } + withLabel:error_retry { errorStrategy = 'retry' maxRetries = 2 } + + withName: GTDBTK_CLASSIFYWF { + cpus = { 10 * task.attempt } + memory = { 128.GB * task.attempt } + time = { 12.h * task.attempt } + } } diff --git a/subworkflows/local/bin_taxonomy.nf b/subworkflows/local/bin_taxonomy.nf index 2e5cf1d..c0e1d19 100644 --- a/subworkflows/local/bin_taxonomy.nf +++ b/subworkflows/local/bin_taxonomy.nf @@ -11,10 +11,10 @@ workflow BIN_TAXONOMY { if(checkm2_summary) { ch_bin_scores = checkm2_summary | splitCsv(header: true, sep: '\t') - | map { row -> + | map { meta, row -> def completeness = Double.parseDouble(row.'Completeness') def contamination = Double.parseDouble(row.'Contamination') - [row.'Bin Id' + ".fa", completeness, contamination] + [row.'Name' + ".fa", completeness, contamination] } ch_filtered_bins = bins @@ -31,9 +31,8 @@ workflow BIN_TAXONOMY { } if(params.enable_gtdbtk && params.gtdbtk_db) { - gtdbtk_db = Channel.of( - [ "GTDBTk", file(params.gtdbtk_db, checkIfExists: true) ] - ) + gtdbtk_db = Channel.of(file(params.gtdbtk_db, checkIfExists: true).listFiles()) + | collect | map { ["gtdb" ,it] } gtdbtk_mash = params.gtdbtk_mash_db ? file(params.gtdbtk_mash_db, checkIfExists: true) : [] GTDBTK_CLASSIFYWF(