Skip to content

Commit

Permalink
Updating yaml assignments and references, some formatting updates
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Nov 21, 2023
1 parent 41eefc4 commit b63db57
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 43 deletions.
17 changes: 9 additions & 8 deletions assets/local_testing/nxOscDF5033.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
assembly:
level: scaffold
assem_level: scaffold
assem_version: 1
sample_id: Oscheius_DF5033
latin_name: to_provide_taxonomic_rank
classT: nematode
asmVersion: 1
gevalType: DTOL
defined_class: nematode
project_id: DTOL
reference_file: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta
assem_reads:
pacbio: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
hic: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/
supplementary: path
longread_type: hifi
longread_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/
hic_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/
supplementary_data: path
alignment:
data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/
common_name: "" # For future implementation (adding bee, wasp, ant etc)
geneset: "OscheiusTipulae.ASM1342590v1,CaenorhabditisElegans.WBcel235,Gae_host.Gae"
geneset_id: "OscheiusTipulae.ASM1342590v1,CaenorhabditisElegans.WBcel235,Gae_host.Gae"
#Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv"
self_comp:
motif_len: 0
Expand Down
12 changes: 6 additions & 6 deletions subworkflows/local/ancestral_gene.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ workflow ANCESTRAL_GENE {
main:
ch_versions = Channel.empty()

ch_grab = GrabFiles(busco_dir)
ch_grab = GrabFiles( busco_dir )

//
// MODULE: EXTRACTS ANCESTRALLY LINKED BUSCO GENES FROM FULL TABLE
Expand All @@ -27,7 +27,7 @@ workflow ANCESTRAL_GENE {
ch_grab,
ancestral_table
)
ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions)
ch_versions = ch_versions.mix( EXTRACT_ANCESTRAL.out.versions )

//
// LOGIC: STRIP OUT METADATA
Expand All @@ -45,7 +45,7 @@ workflow ANCESTRAL_GENE {
EXTRACT_ANCESTRAL.out.comp_location,
assignanc_input
)
ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions)
ch_versions = ch_versions.mix( EXTRACT_ANCESTRAL.out.versions )

//
// MODULES: SORT THE BED FILE
Expand All @@ -54,7 +54,7 @@ workflow ANCESTRAL_GENE {
ASSIGN_ANCESTRAL.out.assigned_bed,
[]
)
ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions)
ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions )

//
// MODULES: CONVERT BED TO INDEXED BIGBED
Expand All @@ -64,11 +64,11 @@ workflow ANCESTRAL_GENE {
dot_genome.map{ it[1] }, // Pull file from tuple(meta, file)
buscogene_as
)
ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions)
ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions )

emit:
ch_ancestral_bigbed = UCSC_BEDTOBIGBED.out.bigbed
versions = ch_versions.ifEmpty(null)
versions = ch_versions.ifEmpty( null )
}
process GrabFiles {
label 'process_tiny'
Expand Down
7 changes: 3 additions & 4 deletions subworkflows/local/hic_mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ workflow HIC_MAPPING {
GET_PAIRED_CONTACT_BED.out.bed
.combine( dot_genome )
.multiMap { meta, paired_contacts, meta_my_genome, my_genome ->
paired : tuple([ id: meta.id, single_end: true], paired_contacts )
paired : tuple( [ id: meta.id, single_end: true], paired_contacts )
genome : my_genome
id : meta.id
}
Expand Down Expand Up @@ -305,18 +305,17 @@ workflow HIC_MAPPING {
.map{ meta, cools, cool_bin ->
[meta, cools]
}
.set{ch_cool}
.set{ ch_cool }

//
// MODULE: ZOOM COOL TO MCOOL
//
COOLER_ZOOMIFY(ch_cool)
COOLER_ZOOMIFY( ch_cool )
ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions)

//
// LOGIC: FOR REPORTING
//

ch_cram_files = GrabFiles( get_reads_input )

ch_cram_files
Expand Down
45 changes: 20 additions & 25 deletions workflows/treeval.nf
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ workflow TREEVAL {
// SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file
//
GENERATE_GENOME (
YAML_INPUT.out.assembly_id,
YAML_INPUT.out.reference
)
ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions )
Expand All @@ -113,9 +112,8 @@ workflow TREEVAL {
ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" )

INSILICO_DIGEST (
YAML_INPUT.out.assembly_id,
GENERATE_GENOME.out.dot_genome,
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
ch_enzyme,
digest_asfile
)
Expand All @@ -137,10 +135,9 @@ workflow TREEVAL {
//
GENE_ALIGNMENT (
GENERATE_GENOME.out.dot_genome,
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference
GENERATE_GENOME.out.ref_index,
GENERATE_GENOME.out.max_scaff_size,
YAML_INPUT.out.assembly_classT,
YAML_INPUT.out.align_data_dir,
YAML_INPUT.out.align_geneset,
YAML_INPUT.out.align_common,
Expand All @@ -153,7 +150,7 @@ workflow TREEVAL {
// SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK
//
REPEAT_DENSITY (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
GENERATE_GENOME.out.dot_genome
)
ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions)
Expand All @@ -162,7 +159,7 @@ workflow TREEVAL {
// SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS
//
GAP_FINDER (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
GENERATE_GENOME.out.max_scaff_size
)
ch_versions = ch_versions.mix(GAP_FINDER.out.versions)
Expand All @@ -172,7 +169,7 @@ workflow TREEVAL {
// // file to generate a file containing sites of self-complementary sequnce.
// //
SELFCOMP (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.mummer_chunk,
YAML_INPUT.out.motif_len,
Expand All @@ -185,7 +182,7 @@ workflow TREEVAL {
// and generated a file of syntenic blocks.
//
SYNTENY (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
YAML_INPUT.out.synteny_path,
YAML_INPUT.out.assembly_classT
)
Expand All @@ -195,7 +192,7 @@ workflow TREEVAL {
// SUBWORKFLOW: Takes reference, pacbio reads
//
LONGREAD_COVERAGE (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.pacbio_reads
)
Expand All @@ -204,9 +201,10 @@ workflow TREEVAL {
//
// SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE
//
TELO_FINDER ( GENERATE_GENOME.out.max_scaff_size,
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.teloseq
TELO_FINDER (
GENERATE_GENOME.out.max_scaff_size,
YAML_INPUT.out.reference,
YAML_INPUT.out.teloseq
)
ch_versions = ch_versions.mix(TELO_FINDER.out.versions)

Expand All @@ -215,8 +213,7 @@ workflow TREEVAL {
//
BUSCO_ANNOTATION (
GENERATE_GENOME.out.dot_genome,
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.assembly_classT,
YAML_INPUT.out.reference,
YAML_INPUT.out.lineageinfo,
YAML_INPUT.out.lineagespath,
buscogene_asfile,
Expand All @@ -228,16 +225,16 @@ workflow TREEVAL {
// SUBWORKFLOW: Takes reads and assembly, produces kmer plot
//
KMER (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.pacbio_reads
YAML_INPUT.out.reference,
YAML_INPUT.out.longreads_reads
)
ch_versions = ch_versions.mix(KMER.out.versions)

//
// SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX
//
HIC_MAPPING (
GENERATE_GENOME.out.reference_tuple,
YAML_INPUT.out.reference,
GENERATE_GENOME.out.ref_index,
GENERATE_GENOME.out.dot_genome,
YAML_INPUT.out.hic_reads,
Expand All @@ -261,22 +258,20 @@ workflow TREEVAL {
//
// LOGIC: GENERATE SOME CHANNELS FOR REPORTING
//
GENERATE_GENOME.out.reference_tuple
.combine( YAML_INPUT.out.assembly_classT )
.combine( YAML_INPUT.out.assembly_ttype )
YAML_INPUT.out.reference
.combine( YAML_INPUT.out.assembly_id )
.combine( LONGREAD_COVERAGE.out.ch_reporting )
.combine( HIC_MAPPING.out.ch_reporting )
.combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions )
.map { meta, reference, lineage, ticket, sample_id, longread_meta, longread_files, hic_meta, hic_files, custom_file -> [
.map { meta, reference, longread_meta, longread_files, hic_meta, hic_files, custom_file -> [
rf_data: tuple(
[ id: meta.id,
sz: file(reference).size(),
ln: lineage,
tk: ticket ],
ln: meta.class,
tk: meta.project_type ],
reference
),
sample_id: sample_id,
sample_id: meta.id,
pb_data: tuple(longread_meta, longread_files),
cm_data: tuple(hic_meta, hic_files),
custom: custom_file,
Expand Down

0 comments on commit b63db57

Please sign in to comment.