From e9d3a645442b0c747c452e25f375e202de856390 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Thu, 22 Aug 2024 03:34:14 +0100 Subject: [PATCH] Added an option to skip filtering hits from the same species --- CHANGELOG.md | 11 +++++--- nextflow.config | 1 + nextflow_schema.json | 5 ++++ subworkflows/local/input_check.nf | 3 +- subworkflows/local/run_blastn.nf | 46 ++++++++++++++++++++----------- 5 files changed, 45 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6330537..ff57d235 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,13 +15,16 @@ The pipeline has now been validated for draft (unpublished) assemblies. automatically selecting based on the taxonomy. - All parameters are now passed the regular Nextflow way. There is no support for the original Yaml configuration files of the Snakemake version. +- New option `--skip_taxon_filtering` to skip the taxon filtering in blast searches. + Mostly relevant for draft assemblies. ### Parameters -| Old parameter | New parameter | -| ------------- | ---------------- | -| --yaml | | -| | --busco_lineages | +| Old parameter | New parameter | +| ------------- | ---------------------- | +| --yaml | | +| | --busco_lineages | +| | --skip_taxon_filtering | > **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. diff --git a/nextflow.config b/nextflow.config index a1ea3ae9..dc8cc891 100644 --- a/nextflow.config +++ b/nextflow.config @@ -33,6 +33,7 @@ params { blastp = null blastx = null blastn = null + skip_taxon_filtering = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index cd0b7341..e838ab04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,6 +32,11 @@ "description": "Turn on optional genome masking if needed.", "fa_icon": "fas fa-toggle-off" }, + "skip_taxon_filtering": { + "type": "boolean", + "description": "Skip filtering out hits from the same species in blast* searches.", + "fa_icon": "fas fa-toggle-off" + }, "fetchngs_samplesheet": { "type": "boolean", "description": "Turn on the conversion from a nf-core/fetchngs samplesheet.", diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 53d21d80..7a8fd112 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -87,9 +87,10 @@ workflow INPUT_CHECK { // - // Get the taxon ID + // Get the taxon ID if we do taxon filtering in blast* searches // ch_parsed_csv.taxon_id + | map { params.skip_taxon_filtering ? '' : it } | first | set { ch_taxon_id } diff --git a/subworkflows/local/run_blastn.nf b/subworkflows/local/run_blastn.nf index 7dc92fd7..d479e832 100644 --- a/subworkflows/local/run_blastn.nf +++ b/subworkflows/local/run_blastn.nf @@ -47,23 +47,37 @@ workflow RUN_BLASTN { | set { ch_chunks } // Run blastn search - // run blastn excluding taxon_id - BLASTN_TAXON ( ch_chunks, blastn, taxon_id ) - ch_versions = ch_versions.mix ( BLASTN_TAXON.out.versions.first() ) - - // check if blastn output table is empty - BLASTN_TAXON.out.txt - | branch { meta, txt -> - empty: txt.isEmpty() - not_empty: true - } - | set { ch_blastn_taxon_out } + if (params.skip_taxon_filtering) { + + // skip BLASTN_TAXON + ch_blast_blastn_input = ch_chunks + + // fake ch_blastn_taxon_out.not_empty + ch_blastn_taxon_out = [ + not_empty: Channel.empty() + ] + + } else { - // repeat the blastn search without excluding taxon_id - ch_blastn_taxon_out.empty - | join ( ch_chunks ) - | map { meta, txt, fasta -> [meta, fasta] } - | set { ch_blast_blastn_input } + // run blastn excluding taxon_id + BLASTN_TAXON ( ch_chunks, blastn, taxon_id ) + ch_versions = ch_versions.mix ( BLASTN_TAXON.out.versions.first() ) + + // check if blastn output table is empty + BLASTN_TAXON.out.txt + | branch { meta, txt -> + empty: txt.isEmpty() + not_empty: true + } + | set { ch_blastn_taxon_out } + + // repeat the blastn search without excluding taxon_id + ch_blastn_taxon_out.empty + | join ( ch_chunks ) + | map { meta, txt, fasta -> [meta, fasta] } + | set { ch_blast_blastn_input } + + } BLAST_BLASTN ( ch_blast_blastn_input, blastn, [] ) ch_versions = ch_versions.mix ( BLAST_BLASTN.out.versions.first() )