Skip to content

Commit

Permalink
initial conversion of contigsTaxonomyAssignment to subworkflow
Browse files Browse the repository at this point in the history
  • Loading branch information
aw-watson committed Jan 8, 2025
1 parent 4c60188 commit 6100ec0
Show file tree
Hide file tree
Showing 12 changed files with 114 additions and 97 deletions.

This file was deleted.

15 changes: 0 additions & 15 deletions workflows/Nextflow/contigsTaxonomyAssignment/nextflow.config

This file was deleted.

8 changes: 8 additions & 0 deletions workflows/Nextflow/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include {HOSTREMOVAL} from './modules/hostRemoval/hostRemoval.nf'
include {ASSEMBLY} from './modules/runAssembly/runAssembly.nf'
include {READSTOCONTIGS} from './modules/runReadsToContig/runReadsToContig.nf'
include {READSTAXONOMYASSIGNMENT} from './modules/readsTaxonomyAssignment/readsTaxonomyAssignment.nf'
include {CONTIGSTAXONOMYASSIGNMENT} from '.modules/contigsTaxonomyAssignment/contigsTaxonomyAssignment.nf'

workflow {

Expand Down Expand Up @@ -50,8 +51,15 @@ workflow {
READSTOCONTIGS(params.r2c.plus(params.shared), paired, unpaired, contigs)
}

//should always run if contigs were provided or generated
READSTOCONTIGS(params.r2c.plus(params.shared), paired, unpaired, contigs)

if(params.modules.readsTaxonomyAssignment) {
READSTAXONOMYASSIGNMENT(params.readsTaxonomy.plus(params.shared).plus(params.faqcs.minLen), paired, unpaired, avgLen)
}

if(params.modules.contigsTaxonomyAssignment) {
CONTIGSTAXONOMYASSIGNMENT(params.contigsTaxonomy.plus(params.shared), contigs, READSTOCONTIGS.out.)
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//André Watson
//Aug 2024
//apwat @ lanl.gov

//base process. Takes a FASTA file containing contigs and performs taxonomic analysis with MICCR (https://github.com/chienchi/miccr).
process contigTaxonomy {
label 'cta'
publishDir(
path: "${settings["outDir"]}/AssemblyBasedAnalysis/Taxonomy",
mode: 'copy'
)
input:
val settings
path contigs

output:
path "${settings["projName"]}.log"
path "log.txt"
path "${settings["projName"]}.lca_ctg.tsv", emit: taxLcaResult
path "${settings["projName"]}.ctg.tsv", emit: taxResult
path "${settings["projName"]}.unclassified.fasta"
path "${settings["projName"]}.paf"

script:
"""
miccr.py -x asm10 -d /venv/database/miccrDB/NCBI-Bacteria-Virus.fna.mmi -t ${settings["cpus"]} -p ${settings["projName"]} -i $contigs 1>log.txt 2>&1
get_unclassified_fasta.pl -in $contigs -classified ${settings["projName"]}.lca_ctg.tsv -output ${settings["projName"]}.unclassified.fasta -log log.txt
"""
}

//adds multi-level taxonomic classification to results file. Takes in a .ctg.tsv file produced by MICCR.
process addLineage {
label 'cta'
publishDir(
path: "${settings["outDir"]}/AssemblyBasedAnalysis/Taxonomy",
mode: 'copy'
)

input:
val settings
path taxResult

output:
path "*.lineage", emit: lineage

script:
//add_lineage.py is from MICCR repo in container
"""
add_lineage.py .venv/database/miccrDB/ $taxResult > ${taxResult.name}.lineage
"""
}

//creates taxonomy classification graphs. Takes lineage file, .lca_ctg.tsv file produced by MICCR,
//and a coverage table (see https://github.com/chienchi/miccr/blob/master/utils/README.md), or from workflow runReadsToContig
process plotAndTable {
label 'cta'
publishDir(
path: "${settings["outDir"]}/AssemblyBasedAnalysis/Taxonomy",
mode: 'copy'
)
input:
val settings
path lineage
path covTable
path lcaResult

output:
path "${settings["projName"]}.ctg_class.LCA.json"
path "summary_by_*.txt"
path "*.pdf"

script:
"""
classification_plot.R $lineage ${settings["projName"]} $covTable
tab2Json_for_dataTable.pl -project_dir $${settings["outDir"]} -mode contig -limit ${settings["rowLimit"]} $lcaResult > ${settings["projName"]}.ctg_class.LCA.json
"""
}

workflow CONTIGSTAXONOMYASSIGNMENT {
take:
settings
contigs
coverageTable

main:
contigTaxonomy(settings, contigs)
addLineage(settings, contigTaxonomy.out.taxResult)
plotAndTable(settings, addLineage.out.lineage, coverageTable, contigTaxonomy.out.taxLcaResult)
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ process makeCoverageTable {
output:
path "contigs_stats.txt"
path "contigs_stats.pdf"
path "*_coverage.table.json"
path "*_coverage.table.json", emit: coverageTable

script:
def rowLimit = settings["rowLimit"] != null ? "${settings["rowLimit"]} " : "3000"
Expand Down Expand Up @@ -157,4 +157,9 @@ workflow READSTOCONTIGS {
extractUnmapped(settings, validationAlignment.out.sortedBam, validationAlignment.out.logFile)
}

covTable = makeCoverageTable.out.coverageTable
emit:
covTable


}
13 changes: 11 additions & 2 deletions workflows/Nextflow/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ params {

}

contigsTaxonomy {
rowLimit = 3000
}


}

Expand All @@ -130,9 +134,11 @@ apptainer {
enabled = true
pullTimeout = "1 hour"
//binds are installation-dependent binds to databases
//not all bound paths are accessed by all containers
runOptions = "--compat --cleanenv --home /media/volume/sdd/nextflow \
--bind /media/volume/sdd/nextflow/database:/venv/bin/../../../database \
--bind /media/volume/sdd/nextflow/krona_dbs:/venv/opt/krona/taxonomy"
--bind /media/volume/sdd/nextflow/krona_dbs:/venv/opt/krona/taxonomy \
--bind /media/volume/sdb/nextflow/database/miccrDB:/venv/database/miccrDB"
}


Expand All @@ -157,7 +163,10 @@ process {
container = 'apwat/run_r2c:1.6'
}
withLabel: 'rta' {
container = "apwat/reads_taxonomy:1.13"
container = 'apwat/reads_taxonomy:1.13'
}
withLabel: 'cta' {
container = 'apwat/contigs_taxonomy:1.10'
}
}

Expand Down

0 comments on commit 6100ec0

Please sign in to comment.