Skip to content

Commit

Permalink
Report all BUSCOs and in the right order
Browse files Browse the repository at this point in the history
  • Loading branch information
muffato committed Jan 15, 2024
1 parent 7527e7a commit b99101b
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 16 deletions.
5 changes: 3 additions & 2 deletions modules/local/blobtoolkit/createblobdir.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR {

input:
tuple val(meta), path(window, stageAs: 'windowstats/*')
tuple val(meta1), path(busco)
tuple val(meta1), path(busco, stageAs: 'lineage??/*')
tuple val(meta2), path(blastp)
tuple val(meta3), path(yaml)
path(taxdump)
Expand All @@ -24,14 +24,15 @@ process BLOBTOOLKIT_CREATEBLOBDIR {
script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def busco_args = busco.collect { "--busco " + it } .join(' ')
def hits_blastp = blastp ? "--hits ${blastp}" : ""
"""
blobtools replace \\
--bedtsvdir windowstats \\
--meta ${yaml} \\
--taxdump ${taxdump} \\
--taxrule buscogenes \\
--busco ${busco} \\
${busco_args} \\
${hits_blastp} \\
--threads ${task.cpus} \\
$args \\
Expand Down
27 changes: 21 additions & 6 deletions subworkflows/local/busco_diamond_blastp.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ workflow BUSCO_DIAMOND {


// Add the basal lineages to the list (excluding duplicates)
basal_lineages = [ "archaea_odb10", "bacteria_odb10", "eukaryota_odb10" ]
basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ]
ch_ancestral_lineages
| map { lineages -> (lineages + basal_lineages).unique() }
| flatten ()
Expand Down Expand Up @@ -86,11 +86,26 @@ workflow BUSCO_DIAMOND {
ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() )


// Select BUSCO results for taxonomically closest database
// Index the lineages in the taxonomic order
def lineage_index = 0
ch_lineages
| map { lineage -> [lineage, lineage_index++] }
| set { ch_ordered_lineages }


// Order BUSCO results accoring to ch_lineages
BUSCO.out.full_table
| combine ( ch_lineages.toList().map { it[0] } )
| filter { meta, table, lineage -> table =~ /$lineage/ }
| map { meta, table, lineage -> [ meta, table ] }
| map { meta, table -> [table.parent.baseName.minus("run_"), meta, table] }
| join ( ch_ordered_lineages )
| map { lineage, meta, table, index -> [meta, table, index] }
| groupTuple()
| map { meta, tables, indexes -> [ meta, tables.withIndex().sort { a, b -> indexes[a[1]] <=> indexes[b[1]] } . collect { table, i -> table } ] }
| set { ch_indexed_buscos }


// Select BUSCO results for taxonomically closest database
ch_indexed_buscos
| map { meta, tables -> [meta, tables[0]] }
| set { ch_first_table }


Expand All @@ -102,7 +117,7 @@ workflow BUSCO_DIAMOND {

emit:
first_table = ch_first_table // channel: [ val(meta), path(full_table) ]
full_table = BUSCO.out.full_table // channel: [ val(meta), path(full_tables) ]
all_tables = ch_indexed_buscos // channel: [ val(meta), path(full_tables) ]
blastp_txt = DIAMOND_BLASTP.out.txt // channel: [ val(meta), path(txt) ]
taxon_id = ch_taxid // channel: taxon_id
multiqc // channel: [ meta, summary ]
Expand Down
8 changes: 2 additions & 6 deletions subworkflows/local/collate_stats.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ include { BLOBTOOLKIT_WINDOWSTATS } from '../../modules/local/blobtoolkit/window

workflow COLLATE_STATS {
take:
busco_table // channel: [ val(meta), path(full_table) ]
busco // channel: [ val(meta), path(full_table) ]
bed // channel: [ val(meta), path(bed) ]
freq // channel: [ val(meta), path(freq) ]
mononuc // channel: [ val(meta), path(mononuc) ]
Expand All @@ -20,11 +20,7 @@ workflow COLLATE_STATS {


// Count BUSCO genes in a region
busco_table
| groupTuple()
| set { ch_busco }

BLOBTOOLKIT_COUNTBUSCOS ( ch_busco, bed )
BLOBTOOLKIT_COUNTBUSCOS ( busco, bed )
ch_versions = ch_versions.mix ( BLOBTOOLKIT_COUNTBUSCOS.out.versions.first() )


Expand Down
4 changes: 2 additions & 2 deletions workflows/blobtoolkit.nf
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ workflow BLOBTOOLKIT {
// SUBWORKFLOW: Collate genome statistics by various window sizes
//
COLLATE_STATS (
BUSCO_DIAMOND.out.full_table,
BUSCO_DIAMOND.out.all_tables,
COVERAGE_STATS.out.bed,
COVERAGE_STATS.out.freq,
COVERAGE_STATS.out.mononuc,
Expand All @@ -186,7 +186,7 @@ workflow BLOBTOOLKIT {
BLOBTOOLS (
INPUT_CHECK.out.config,
COLLATE_STATS.out.window_tsv,
BUSCO_DIAMOND.out.first_table,
BUSCO_DIAMOND.out.all_tables,
BUSCO_DIAMOND.out.blastp_txt.ifEmpty([[],[]]),
RUN_BLASTX.out.blastx_out.ifEmpty([[],[]]),
RUN_BLASTN.out.blastn_out.ifEmpty([[],[]]),
Expand Down

0 comments on commit b99101b

Please sign in to comment.