Skip to content

Commit

Permalink
Hide the CSV from the rest of the pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
muffato committed May 20, 2024
1 parent efa7b1e commit 0e6fa8e
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 23 deletions.
23 changes: 3 additions & 20 deletions subworkflows/local/busco_diamond_blastp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebusco
workflow BUSCO_DIAMOND {
take:
fasta // channel: [ val(meta), path(fasta) ]
csv // channel: [ val(meta), path(csv) ]
busco_lin // channel: val([busco_lineages])
busco_db // channel: path(busco_db)
blastp // channel: path(blastp_db)

Expand All @@ -20,30 +20,14 @@ workflow BUSCO_DIAMOND {
ch_versions = Channel.empty()


//
// Get NCBI species ID
//
csv
| map { meta, csv -> csv }
| splitCsv(header: ['key', 'value'])
| filter { it.key == "taxon_id" }
| map { it.value }
| set { ch_taxid }


//
// Prepare the BUSCO linages
//
// 0. Initialise sone variables
basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ]
def lineage_position = 0
// 1. Parse the NCBI_GET_ODB_TAXON output
csv
| map { meta, csv -> csv }
| splitCsv(header: ['key', 'value'])
| filter { it.key == "busco_lineage" }
| map { it.value }
| collect
// 1. Start from the taxon's lineages
busco_lin
// 2. Add the (missing) basal lineages
| map { lineages -> (lineages + basal_lineages).unique() }
| flatten ()
Expand Down Expand Up @@ -139,7 +123,6 @@ workflow BUSCO_DIAMOND {
first_table = ch_first_table // channel: [ val(meta), path(full_table) ]
all_tables = ch_indexed_buscos // channel: [ val(meta), path(full_tables) ]
blastp_txt = DIAMOND_BLASTP.out.txt // channel: [ val(meta), path(txt) ]
taxon_id = ch_taxid // channel: taxon_id
multiqc // channel: [ meta, summary ]
versions = ch_versions // channel: [ versions.yml ]
}
34 changes: 33 additions & 1 deletion subworkflows/local/input_check.nf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,37 @@ workflow INPUT_CHECK {
ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first())


//
// Parse the CSV file
//
GENERATE_CONFIG.out.csv
| map { meta, csv -> csv }
| splitCsv(header: ['key', 'value'])
| branch {
taxon_id: it.key == "taxon_id"
return it.value
busco_lineage: it.key == "busco_lineage"
return it.value
}
| set { ch_parsed_csv }


//
// Get NCBI species ID
//
ch_parsed_csv.taxon_id
| first
| set { ch_taxon_id }


//
// Get the BUSCO linages
//
ch_parsed_csv.busco_lineage
| collect
| set { ch_busco_lineages }


if ( params.accession ) {
read_files
| map { meta, data -> meta.id.split("_")[0..-2].join("_") }
Expand All @@ -89,7 +120,8 @@ workflow INPUT_CHECK {
emit:
reads // channel: [ val(meta), path(datafile) ]
config = ch_config // channel: [ val(meta), path(yaml) ]
csv_params = GENERATE_CONFIG.out.csv // channel: [ val(meta), path(csv) ]
taxon_id = ch_taxon_id // channel: val(taxon_id)
busco_lineages = ch_busco_lineages // channel: val([busco_lin])
versions = ch_versions // channel: [ versions.yml ]
}

Expand Down
4 changes: 2 additions & 2 deletions workflows/blobtoolkit.nf
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ workflow BLOBTOOLKIT {
//
BUSCO_DIAMOND (
PREPARE_GENOME.out.genome,
INPUT_CHECK.out.csv_params,
INPUT_CHECK.out.busco_lineages,
ch_busco_db,
ch_blastp,
)
Expand All @@ -162,7 +162,7 @@ workflow BLOBTOOLKIT {
RUN_BLASTX.out.blastx_out,
PREPARE_GENOME.out.genome,
ch_blastn,
BUSCO_DIAMOND.out.taxon_id
INPUT_CHECK.out.taxon_id
)

//
Expand Down

0 comments on commit 0e6fa8e

Please sign in to comment.