Skip to content

Commit

Permalink
Additions of steps flag
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Sep 5, 2024
1 parent 4ebc614 commit e4b4cf0
Show file tree
Hide file tree
Showing 3 changed files with 239 additions and 157 deletions.
198 changes: 117 additions & 81 deletions workflows/treeval.nf
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ workflow TREEVAL {
//
ch_versions = Channel.empty()

exclude_workflow_steps = params.steps ? params.steps.split(",") : ""

full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", ""]

if (!full_list.containsAll(exclude_workflow_steps)) {
exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list"
}

params.entry = 'FULL'
input_ch = Channel.fromPath(params.input, checkIfExists: true)

Expand Down Expand Up @@ -111,15 +119,17 @@ workflow TREEVAL {
// SUBWORKFLOW: Takes reference, channel of enzymes, my.genome, assembly_id and as file to generate
// file with enzymatic digest sites.
//
ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )
if ( !exclude_workflow_steps.contains("insilico_digest")) {
ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )

INSILICO_DIGEST (
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.reference_ch,
ch_enzyme,
digest_asfile
)
ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions )
INSILICO_DIGEST (
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.reference_ch,
ch_enzyme,
digest_asfile
)
ch_versions = ch_versions.mix( INSILICO_DIGEST.out.versions )
}

//
// SUBWORKFLOW: FOR SPLITTING THE REF GENOME INTO SCAFFOLD CHUNKS AND RUNNING SOME SUBWORKFLOWS
Expand All @@ -135,115 +145,141 @@ workflow TREEVAL {
//
// SUBWORKFLOW: Takes input fasta to generate BB files containing alignment data
//
GENE_ALIGNMENT (
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.ref_index,
YAML_INPUT.out.align_data_dir,
YAML_INPUT.out.align_geneset,
YAML_INPUT.out.align_common,
YAML_INPUT.out.intron_size,
gene_alignment_asfiles
)
ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions)
if ( !exclude_workflow_steps.contains("gene_alignment")) {
GENE_ALIGNMENT (
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.ref_index,
YAML_INPUT.out.align_data_dir,
YAML_INPUT.out.align_geneset,
YAML_INPUT.out.align_common,
YAML_INPUT.out.intron_size,
gene_alignment_asfiles
)
ch_versions = ch_versions.mix(GENE_ALIGNMENT.out.versions)
}

//
// SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK
//
REPEAT_DENSITY (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome
)
ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions )
if ( !exclude_workflow_steps.contains("repeat_density")) {
REPEAT_DENSITY (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome
)
ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions )
}

//
// SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS
//
GAP_FINDER (
YAML_INPUT.out.reference_ch
)
ch_versions = ch_versions.mix( GAP_FINDER.out.versions )
if ( !exclude_workflow_steps.contains("gap_finder")) {
GAP_FINDER (
YAML_INPUT.out.reference_ch
)
ch_versions = ch_versions.mix( GAP_FINDER.out.versions )
}

//
// SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as
// file to generate a file containing sites of self-complementary sequnce.
//
SELFCOMP (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.mummer_chunk,
YAML_INPUT.out.motif_len,
selfcomp_asfile
)
ch_versions = ch_versions.mix( SELFCOMP.out.versions )
if ( !exclude_workflow_steps.contains("selfcomp")) {
SELFCOMP (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.mummer_chunk,
YAML_INPUT.out.motif_len,
selfcomp_asfile
)
ch_versions = ch_versions.mix( SELFCOMP.out.versions )
}

//
// SUBWORKFLOW: Takes reference, the directory of syntenic genomes and order/clade of sequence
// and generated a file of syntenic blocks.
//
SYNTENY (
YAML_INPUT.out.reference_ch,
YAML_INPUT.out.synteny_path
)
ch_versions = ch_versions.mix( SYNTENY.out.versions )
if ( !exclude_workflow_steps.contains("synteny")) {
SYNTENY (
YAML_INPUT.out.reference_ch,
YAML_INPUT.out.synteny_path
)
ch_versions = ch_versions.mix( SYNTENY.out.versions )
}

//
// SUBWORKFLOW: Takes reference, pacbio reads
//
READ_COVERAGE (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.read_ch
)
ch_versions = ch_versions.mix( READ_COVERAGE.out.versions )
if ( !exclude_workflow_steps.contains("read_coverage")) {
READ_COVERAGE (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.read_ch
)
coverage_report = READ_COVERAGE.out.ch_reporting
ch_versions = ch_versions.mix(READ_COVERAGE.out.versions)
} else {
coverage_report = []
}

//
// SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE
//
TELO_FINDER ( YAML_INPUT.out.reference_ch,
YAML_INPUT.out.teloseq
)
ch_versions = ch_versions.mix( TELO_FINDER.out.versions )
if ( !exclude_workflow_steps.contains("telo_finder")) {
TELO_FINDER ( YAML_INPUT.out.reference_ch,
YAML_INPUT.out.teloseq
)
ch_versions = ch_versions.mix( TELO_FINDER.out.versions )
}

//
// SUBWORKFLOW: GENERATE BUSCO ANNOTATION FOR ANCESTRAL UNITS
//
BUSCO_ANNOTATION (
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.reference_ch,
YAML_INPUT.out.lineageinfo,
YAML_INPUT.out.lineagespath,
buscogene_asfile,
ancestral_table
)
ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions )
if ( !exclude_workflow_steps.contains("busco")) {
BUSCO_ANNOTATION (
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.reference_ch,
YAML_INPUT.out.lineageinfo,
YAML_INPUT.out.lineagespath,
buscogene_asfile,
ancestral_table
)
ch_versions = ch_versions.mix( BUSCO_ANNOTATION.out.versions )
}

//
// SUBWORKFLOW: Takes reads and assembly, produces kmer plot
//
KMER (
YAML_INPUT.out.reference_ch,
YAML_INPUT.out.read_ch
)
ch_versions = ch_versions.mix( KMER.out.versions )
if ( !exclude_workflow_steps.contains("kmer")) {
KMER (
YAML_INPUT.out.reference_ch,
YAML_INPUT.out.read_ch
)
ch_versions = ch_versions.mix( KMER.out.versions )
}

//
// SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX
//
HIC_MAPPING (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.ref_index,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.hic_reads_ch,
YAML_INPUT.out.assembly_id,
GAP_FINDER.out.gap_file,
READ_COVERAGE.out.ch_covbw_nor,
READ_COVERAGE.out.ch_covbw_avg,
TELO_FINDER.out.bedgraph_file,
REPEAT_DENSITY.out.repeat_density,
params.entry
)
ch_versions = ch_versions.mix( HIC_MAPPING.out.versions )
if ( !exclude_workflow_steps.contains("hic_mapping")) {
HIC_MAPPING (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.ref_index,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.hic_reads_ch,
YAML_INPUT.out.assembly_id,
GAP_FINDER.out.gap_file,
READ_COVERAGE.out.ch_covbw_nor,
READ_COVERAGE.out.ch_covbw_avg,
TELO_FINDER.out.bedgraph_file,
REPEAT_DENSITY.out.repeat_density,
params.entry
)
ch_versions = ch_versions.mix( HIC_MAPPING.out.versions )
hic_report = HIC_MAPPING.out.ch_reporting
} else {
hic_report = []
}

//
// SUBWORKFLOW: Collates version data from prior subworflows
Expand All @@ -256,8 +292,8 @@ workflow TREEVAL {
// LOGIC: GENERATE SOME CHANNELS FOR REPORTING
//
YAML_INPUT.out.reference_ch
.combine( READ_COVERAGE.out.ch_reporting )
.combine( HIC_MAPPING.out.ch_reporting )
.combine( coverage_report )
.combine( hic_report )
.combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions )
.map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [
rf_data: tuple(
Expand Down
94 changes: 58 additions & 36 deletions workflows/treeval_rapid.nf
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ workflow TREEVAL_RAPID {
main:
ch_versions = Channel.empty()

exclude_workflow_steps = params.steps ? params.steps.split(",") : ""

full_list = ["insilico_digest", "gene_alignments", "repeat_density", "gap_finder", "selfcomp", "synteny", "read_coverage", "telo_finder", "busco", "kmer", "hic_mapping", ""]

if (!full_list.containsAll(exclude_workflow_steps)) {
exit 1, "There is an extra argument given on Command Line: \n Check contents of --exclude: $exclude_workflow_steps\nMaster list is: $full_list"
}

params.entry = 'RAPID'
input_ch = Channel.fromPath(params.input, checkIfExists: true)
//
Expand All @@ -75,55 +83,69 @@ workflow TREEVAL_RAPID {
//
// SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK
//
REPEAT_DENSITY (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome
)
ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions )

if ( !exclude_workflow_steps.contains("repeat_density")) {
REPEAT_DENSITY (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome
)
ch_versions = ch_versions.mix( REPEAT_DENSITY.out.versions )
}

//
// SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS
//
GAP_FINDER (
YAML_INPUT.out.reference_ch
)
ch_versions = ch_versions.mix( GAP_FINDER.out.versions )
if ( !exclude_workflow_steps.contains("gap_finder")) {
GAP_FINDER (
YAML_INPUT.out.reference_ch
)
ch_versions = ch_versions.mix( GAP_FINDER.out.versions )
}

//
// SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE
//
TELO_FINDER ( YAML_INPUT.out.reference_ch,
YAML_INPUT.out.teloseq
)
ch_versions = ch_versions.mix( TELO_FINDER.out.versions )
if ( !exclude_workflow_steps.contains("telo_finder")) {
TELO_FINDER ( YAML_INPUT.out.reference_ch,
YAML_INPUT.out.teloseq
)
ch_versions = ch_versions.mix( TELO_FINDER.out.versions )
}

//
// SUBWORKFLOW: Takes reference, pacbio reads
//
READ_COVERAGE (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.read_ch
)
ch_versions = ch_versions.mix( READ_COVERAGE.out.versions )

if ( !exclude_workflow_steps.contains("read_coverage")) {
READ_COVERAGE (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.read_ch
)
ch_versions = ch_versions.mix( READ_COVERAGE.out.versions )
} else {
coverage_report = []
}
//
// SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX
//
HIC_MAPPING (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.ref_index,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.hic_reads_ch,
YAML_INPUT.out.assembly_id,
GAP_FINDER.out.gap_file,
READ_COVERAGE.out.ch_covbw_nor,
READ_COVERAGE.out.ch_covbw_avg,
TELO_FINDER.out.bedgraph_file,
REPEAT_DENSITY.out.repeat_density,
params.entry
)
ch_versions = ch_versions.mix( HIC_MAPPING.out.versions )
if ( !exclude_workflow_steps.contains("hic_mapping")) {
HIC_MAPPING (
YAML_INPUT.out.reference_ch,
GENERATE_GENOME.out.ref_index,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.hic_reads_ch,
YAML_INPUT.out.assembly_id,
GAP_FINDER.out.gap_file,
READ_COVERAGE.out.ch_covbw_nor,
READ_COVERAGE.out.ch_covbw_avg,
TELO_FINDER.out.bedgraph_file,
REPEAT_DENSITY.out.repeat_density,
params.entry
)
ch_versions = ch_versions.mix( HIC_MAPPING.out.versions )
} else {
hic_report = []
}

//
// SUBWORKFLOW: Collates version data from prior subworflows
Expand All @@ -136,8 +158,8 @@ workflow TREEVAL_RAPID {
// LOGIC: GENERATE SOME CHANNELS FOR REPORTING
//
YAML_INPUT.out.reference_ch
.combine( READ_COVERAGE.out.ch_reporting )
.combine( HIC_MAPPING.out.ch_reporting )
.combine( coverage_report )
.combine( hic_report )
.combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions )
.map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [
rf_data: tuple(
Expand Down
Loading

0 comments on commit e4b4cf0

Please sign in to comment.