Skip to content

Commit

Permalink
Updating channels and assignments, removing unused channels
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Nov 21, 2023
1 parent 14b35e6 commit 41eefc4
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 86 deletions.
1 change: 0 additions & 1 deletion subworkflows/local/busco_annotation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ workflow BUSCO_ANNOTATION {
take:
dot_genome // channel: [val(meta), [ datafile ]]
reference_tuple // channel: [val(meta), [ datafile ]]
assembly_classT // channel: val(class)
lineageinfo // channel: val(lineage_db)
lineagespath // channel: val(/path/to/buscoDB)
buscogene_as // channel: val(dot_as location)
Expand Down
7 changes: 3 additions & 4 deletions subworkflows/local/gene_alignment.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ workflow GENE_ALIGNMENT {
reference_tuple // Channel [ val(meta), path(file) ]
reference_index // Channel [ val(meta), path(file) ]
max_scaff_size // Channel val(size of largest scaffold in bp)
assembly_classT // Channel val(clade_id)
alignment_datadir // Channel val(geneset_dir)
alignment_genesets // Channel val(geneset_id)
alignment_common // Channel val(common_name) // Not yet in use
Expand All @@ -46,10 +45,10 @@ workflow GENE_ALIGNMENT {
//
ch_data
.combine( alignment_datadir )
.combine( assembly_classT )
.combine( reference_tuple )
.map {
ch_org, data_dir, classT ->
file("${data_dir}${classT}/csv_data/${ch_org}-data.csv")
ch_org, data_dir, meta, ref ->
file("${data_dir}${meta.class}/csv_data/${ch_org}-data.csv")
}
.splitCsv( header: true, sep:',')
.map( row ->
Expand Down
17 changes: 2 additions & 15 deletions subworkflows/local/generate_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,29 +9,17 @@ include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff'

workflow GENERATE_GENOME {
take:
assembly_id // Channel val(assembly_id)
reference_file // Channel path(file)
reference_ch // tuple( [id: name] , file)

main:
ch_versions = Channel.empty()

//
// LOGIC: GENERATES A REFERENCE DATA TUPLE
//
reference_file
.combine( assembly_id )
.map { file, sample_id ->
tuple ([id: sample_id],
file)
}
.set { to_chromsize }

//
// MODULE: GENERATE INDEX OF REFERENCE
// EMITS REFERENCE INDEX FILE MODIFIED FOR SCAFF SIZES
//
CUSTOM_GETCHROMSIZES (
to_chromsize,
reference_ch,
"temp.genome"
)
ch_versions = ch_versions.mix( CUSTOM_GETCHROMSIZES.out.versions )
Expand All @@ -56,6 +44,5 @@ workflow GENERATE_GENOME {
max_scaff_size = GET_LARGEST_SCAFF.out.scaff_size.toInteger()
dot_genome = GNU_SORT.out.sorted
ref_index = CUSTOM_GETCHROMSIZES.out.fai
reference_tuple = to_chromsize
versions = ch_versions.ifEmpty(null)
}
1 change: 0 additions & 1 deletion subworkflows/local/insilico_digest.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ include { UCSC_BEDTOBIGBED } from '../../modules/nf-core/ucsc/bedto

workflow INSILICO_DIGEST {
take:
myid // Channel val(sample_id)
sizefile // Channel [ val(meta), path(my.genome_file) ]
sample // Channel [ val(meta), path(reference_file) ]
ch_enzyme // Channel val( "bspq1","bsss1","DLE1" )
Expand Down
18 changes: 9 additions & 9 deletions subworkflows/local/kmer.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/

workflow KMER {
take:
reference_tuple // Channel [ val(meta), path(file) ]
reads_path // Channel: [ val(meta), val( str ) ]
reference_tuple // Channel: [ val( meta ), path( file ) ]
reads_path // Channel: [ val( meta ), val( str ) ]

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -52,31 +52,31 @@ workflow KMER {
//
CAT_CAT.out.file_out
.map{ meta, reads ->
reads.getName().endsWith('gz') ? [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa.gz'] : [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa']
reads.getName().endsWith('gz') ? [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa.gz'] : [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa']
}
.set{ ch_reads_merged }

//
// LOGIC: PREPARE FASTK INPUT
//
CAT_CAT.out.file_out
.join(ch_reads_merged)
.join( ch_reads_merged )
.map{ meta, reads_old, reads_new ->
reads_old.renameTo(reads_new);
reads_old.renameTo( reads_new );
}

//
// MODULE: COUNT KMERS
//
FASTK_FASTK( ch_reads_merged )
ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first())
ch_versions = ch_versions.mix( FASTK_FASTK.out.versions.first() )

//
// LOGIC: PREPARE MERQURYFK INPUT
//
FASTK_FASTK.out.hist
.combine(FASTK_FASTK.out.ktab)
.combine(reference_tuple)
.combine( FASTK_FASTK.out.ktab )
.combine( reference_tuple )
.map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary ->
tuple( meta_hist, hist, ktab, primary, [])
}
Expand All @@ -86,7 +86,7 @@ workflow KMER {
// MODULE: USE KMER HISTOGRAM TO PRODUCE SPECTRA
//
MERQURYFK_MERQURYFK ( ch_merq )
ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first())
ch_versions = ch_versions.mix( MERQURYFK_MERQURYFK.out.versions.first() )

emit:
merquryk_completeness = MERQURYFK_MERQURYFK.out.stats // meta, stats
Expand Down
56 changes: 27 additions & 29 deletions subworkflows/local/longread_coverage.nf
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ workflow LONGREAD_COVERAGE {
MINIMAP2_INDEX(
reference_tuple
)
ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions )
ch_versions = ch_versions.mix( MINIMAP2_INDEX.out.versions )

//
// LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT
Expand All @@ -58,7 +58,7 @@ workflow LONGREAD_COVERAGE {
//
// MODULE: GETS PACBIO READ PATHS FROM READS_PATH
//
ch_grabbed_read_paths = GrabFiles( get_reads_input )
ch_grabbed_read_paths = GrabFiles( get_reads_input )

//
// LOGIC: PACBIO READS FILES TO CHANNEL
Expand Down Expand Up @@ -126,8 +126,8 @@ workflow LONGREAD_COVERAGE {
small.bool_cigar_paf,
small.bool_cigar_bam
)
ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions)
ch_align_bams = MINIMAP2_ALIGN.out.bam
ch_versions = ch_versions.mix( MINIMAP2_ALIGN.out.versions )
ch_align_bams = MINIMAP2_ALIGN.out.bam

//
// MODULE: ALIGN READS TO REFERENCE WHEN REFERENCE >5GB PER SCAFFOLD
Expand All @@ -139,7 +139,7 @@ workflow LONGREAD_COVERAGE {
large.bool_cigar_paf,
large.bool_cigar_bam
)
ch_versions = ch_versions.mix(MINIMAP2_ALIGN_SPLIT.out.versions)
ch_versions = ch_versions.mix( MINIMAP2_ALIGN_SPLIT.out.versions )

//
// LOGIC: COLLECT OUTPUTTED BAM FILES FROM BOTH PROCESSES
Expand Down Expand Up @@ -172,15 +172,15 @@ workflow LONGREAD_COVERAGE {
reference_tuple,
[[],[]]
)
ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions)
ch_versions = ch_versions.mix( SAMTOOLS_MERGE.out.versions )

//
// MODULE: SORT THE MERGED BAM BEFORE CONVERSION
//
SAMTOOLS_SORT (
SAMTOOLS_MERGE.out.bam
)
ch_versions = ch_versions.mix( SAMTOOLS_MERGE.out.versions )
ch_versions = ch_versions.mix( SAMTOOLS_MERGE.out.versions )

//
// LOGIC: PREPARING MERGE INPUT WITH REFERENCE GENOME AND REFERENCE INDEX
Expand Down Expand Up @@ -209,15 +209,15 @@ workflow LONGREAD_COVERAGE {
view_input.ref_input,
[]
)
ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions)
ch_versions = ch_versions.mix( SAMTOOLS_VIEW.out.versions )

//
// MODULE: BAM TO PRIMARY BED
//
BEDTOOLS_BAMTOBED(
SAMTOOLS_VIEW.out.bam
)
ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions)
ch_versions = ch_versions.mix( BEDTOOLS_BAMTOBED.out.versions )

//
// LOGIC: PREPARING Genome2Cov INPUT
Expand All @@ -244,48 +244,48 @@ workflow LONGREAD_COVERAGE {
genomecov_input.dot_genome,
genomecov_input.file_suffix
)
ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions)
ch_versions = ch_versions.mix( BEDTOOLS_GENOMECOV.out.versions )

//
// MODULE: SORT THE PRIMARY BED FILE
//
GNU_SORT(
BEDTOOLS_GENOMECOV.out.genomecov
)
ch_versions = ch_versions.mix(GNU_SORT.out.versions)
ch_versions = ch_versions.mix( GNU_SORT.out.versions )

//
// MODULE: get_minmax_punches
//
GETMINMAXPUNCHES(
GNU_SORT.out.sorted
)
ch_versions = ch_versions.mix(GETMINMAXPUNCHES.out.versions)
ch_versions = ch_versions.mix( GETMINMAXPUNCHES.out.versions)

//
// MODULE: get_minmax_punches
//
BEDTOOLS_MERGE_MAX(
GETMINMAXPUNCHES.out.max
)
ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MAX.out.versions)
ch_versions = ch_versions.mix( BEDTOOLS_MERGE_MAX.out.versions )

//
// MODULE: get_minmax_punches
//
BEDTOOLS_MERGE_MIN(
GETMINMAXPUNCHES.out.min
)
ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MIN.out.versions)
ch_versions = ch_versions.mix( BEDTOOLS_MERGE_MIN.out.versions )

//
// MODULE: GENERATE DEPTHGRAPH
//
GRAPHOVERALLCOVERAGE(
GNU_SORT.out.sorted
)
ch_versions = ch_versions.mix(GRAPHOVERALLCOVERAGE.out.versions)
ch_depthgraph = GRAPHOVERALLCOVERAGE.out.part
ch_versions = ch_versions.mix( GRAPHOVERALLCOVERAGE.out.versions )
ch_depthgraph = GRAPHOVERALLCOVERAGE.out.part

//
// LOGIC: PREPARING FINDHALFCOVERAGE INPUT
Expand All @@ -308,7 +308,7 @@ workflow LONGREAD_COVERAGE {
halfcov_input.genome_file,
halfcov_input.depthgraph_file
)
ch_versions = ch_versions.mix(FINDHALFCOVERAGE.out.versions)
ch_versions = ch_versions.mix(FINDHALFCOVERAGE.out.versions)

//
// LOGIC: PREPARING NORMAL COVERAGE INPUT
Expand All @@ -329,15 +329,15 @@ workflow LONGREAD_COVERAGE {
bed2bw_normal_input.ch_coverage_bed,
bed2bw_normal_input.genome_file
)
ch_versions = ch_versions.mix(BED2BW_NORMAL.out.versions)
ch_versions = ch_versions.mix(BED2BW_NORMAL.out.versions)

//
// MODULE: CONVERT COVERAGE TO LOG2
//
LONGREADCOVERAGESCALELOG2(
GNU_SORT.out.sorted
)
ch_versions = ch_versions.mix(LONGREADCOVERAGESCALELOG2.out.versions)
ch_versions = ch_versions.mix(LONGREADCOVERAGESCALELOG2.out.versions)

//
// LOGIC: PREPARING LOG2 COVERAGE INPUT
Expand All @@ -358,13 +358,11 @@ workflow LONGREAD_COVERAGE {
bed2bw_log2_input.ch_coverage_bed,
bed2bw_log2_input.genome_file
)
ch_versions = ch_versions.mix(BED2BW_LOG2.out.versions)
ch_versions = ch_versions.mix(BED2BW_LOG2.out.versions)

//
// LOGIC: GENERATE A SUMMARY TUPLE FOR OUTPUT
//
ch_grabbed_read_paths.map{ it }

ch_grabbed_read_paths
.collect()
.map { meta, fasta ->
Expand All @@ -376,13 +374,13 @@ workflow LONGREAD_COVERAGE {
.set { ch_reporting_pacbio }

emit:
ch_minbed = BEDTOOLS_MERGE_MIN.out.bed
ch_halfbed = FINDHALFCOVERAGE.out.bed
ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed
ch_reporting = ch_reporting_pacbio.collect()
ch_covbw_nor = BED2BW_NORMAL.out.bigwig
ch_covbw_log = BED2BW_LOG2.out.bigwig
versions = ch_versions
ch_minbed = BEDTOOLS_MERGE_MIN.out.bed
ch_halfbed = FINDHALFCOVERAGE.out.bed
ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed
ch_reporting = ch_reporting_pacbio.collect()
ch_covbw_nor = BED2BW_NORMAL.out.bigwig
ch_covbw_log = BED2BW_LOG2.out.bigwig
versions = ch_versions
}

process GrabFiles {
Expand Down
9 changes: 7 additions & 2 deletions subworkflows/local/synteny.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ workflow SYNTENY {
take:
reference_tuple // Channel [ val(meta), path(file) ]
synteny_path // Channel val(meta)
assembly_classT // Channel val(meta)

main:
ch_versions = Channel.empty()
Expand All @@ -19,9 +18,15 @@ workflow SYNTENY {
// MODULE: SEARCHES PREDETERMINED PATH FOR SYNTENIC GENOME FILES BASED ON CLASS
// EMITS PATH LIST
//
reference_ch
.map{meta, file ->
meta.class
}
.set { defined_class }

GET_SYNTENY_GENOMES(
synteny_path,
assembly_classT
defined_class
)
ch_versions = ch_versions.mix( GET_SYNTENY_GENOMES.out.versions )

Expand Down
Loading

0 comments on commit 41eefc4

Please sign in to comment.