diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index fd8dc7dd..1856e56b 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -11,7 +11,7 @@ include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebusco workflow BUSCO_DIAMOND { take: fasta // channel: [ val(meta), path(fasta) ] - csv // channel: [ val(meta), path(csv) ] + busco_lin // channel: val([busco_lineages]) busco_db // channel: path(busco_db) blastp // channel: path(blastp_db) @@ -20,30 +20,14 @@ workflow BUSCO_DIAMOND { ch_versions = Channel.empty() - // - // Get NCBI species ID - // - csv - | map { meta, csv -> csv } - | splitCsv(header: ['key', 'value']) - | filter { it.key == "taxon_id" } - | map { it.value } - | set { ch_taxid } - - // // Prepare the BUSCO linages // // 0. Initialise sone variables basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ] def lineage_position = 0 - // 1. Parse the NCBI_GET_ODB_TAXON output - csv - | map { meta, csv -> csv } - | splitCsv(header: ['key', 'value']) - | filter { it.key == "busco_lineage" } - | map { it.value } - | collect + // 1. Start from the taxon's lineages + busco_lin // 2. Add the (missing) basal lineages | map { lineages -> (lineages + basal_lineages).unique() } | flatten () @@ -139,7 +123,6 @@ workflow BUSCO_DIAMOND { first_table = ch_first_table // channel: [ val(meta), path(full_table) ] all_tables = ch_indexed_buscos // channel: [ val(meta), path(full_tables) ] blastp_txt = DIAMOND_BLASTP.out.txt // channel: [ val(meta), path(txt) ] - taxon_id = ch_taxid // channel: taxon_id multiqc // channel: [ meta, summary ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 3b14c5dc..2c9540f8 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -70,6 +70,37 @@ workflow INPUT_CHECK { ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first()) + // + // Parse the CSV file + // + GENERATE_CONFIG.out.csv + | map { meta, csv -> csv } + | splitCsv(header: ['key', 'value']) + | branch { + taxon_id: it.key == "taxon_id" + return it.value + busco_lineage: it.key == "busco_lineage" + return it.value + } + | set { ch_parsed_csv } + + + // + // Get NCBI species ID + // + ch_parsed_csv.taxon_id + | first + | set { ch_taxon_id } + + + // + // Get the BUSCO linages + // + ch_parsed_csv.busco_lineage + | collect + | set { ch_busco_lineages } + + if ( params.accession ) { read_files | map { meta, data -> meta.id.split("_")[0..-2].join("_") } @@ -89,7 +120,8 @@ workflow INPUT_CHECK { emit: reads // channel: [ val(meta), path(datafile) ] config = ch_config // channel: [ val(meta), path(yaml) ] - csv_params = GENERATE_CONFIG.out.csv // channel: [ val(meta), path(csv) ] + taxon_id = ch_taxon_id // channel: val(taxon_id) + busco_lineages = ch_busco_lineages // channel: val([busco_lin]) versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index c22f8821..ade3b8f6 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -138,7 +138,7 @@ workflow BLOBTOOLKIT { // BUSCO_DIAMOND ( PREPARE_GENOME.out.genome, - INPUT_CHECK.out.csv_params, + INPUT_CHECK.out.busco_lineages, ch_busco_db, ch_blastp, ) @@ -162,7 +162,7 @@ workflow BLOBTOOLKIT { RUN_BLASTX.out.blastx_out, PREPARE_GENOME.out.genome, ch_blastn, - BUSCO_DIAMOND.out.taxon_id + INPUT_CHECK.out.taxon_id ) //