diff --git a/assets/local_testing/nxOscDF5033.yaml b/assets/local_testing/nxOscDF5033.yaml index 86ac7182..6a0c147f 100755 --- a/assets/local_testing/nxOscDF5033.yaml +++ b/assets/local_testing/nxOscDF5033.yaml @@ -1,19 +1,20 @@ assembly: - level: scaffold + assem_level: scaffold + assem_version: 1 sample_id: Oscheius_DF5033 latin_name: to_provide_taxonomic_rank - classT: nematode - asmVersion: 1 - gevalType: DTOL + defined_class: nematode + project_id: DTOL reference_file: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta assem_reads: - pacbio: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ - hic: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/ - supplementary: path + longread_type: hifi + longread_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ + hic_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/ + supplementary_data: path alignment: data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/ common_name: "" # For future implementation (adding bee, wasp, ant etc) - geneset: "OscheiusTipulae.ASM1342590v1,CaenorhabditisElegans.WBcel235,Gae_host.Gae" + geneset_id: "OscheiusTipulae.ASM1342590v1,CaenorhabditisElegans.WBcel235,Gae_host.Gae" #Path should end up looking like "{data_dir}{classT}/{common_name}/csv_data/{geneset}-data.csv" self_comp: motif_len: 0 diff --git a/subworkflows/local/ancestral_gene.nf b/subworkflows/local/ancestral_gene.nf index ed212d4d..dc47cce7 100755 --- a/subworkflows/local/ancestral_gene.nf +++ b/subworkflows/local/ancestral_gene.nf @@ -18,7 +18,7 @@ workflow ANCESTRAL_GENE { main: ch_versions = Channel.empty() - ch_grab = GrabFiles(busco_dir) + ch_grab = GrabFiles( busco_dir ) // // MODULE: EXTRACTS ANCESTRALLY LINKED BUSCO GENES FROM FULL TABLE @@ -27,7 +27,7 @@ workflow ANCESTRAL_GENE { ch_grab, ancestral_table ) - ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions) + ch_versions = ch_versions.mix( EXTRACT_ANCESTRAL.out.versions ) // // LOGIC: STRIP OUT METADATA @@ -45,7 +45,7 @@ workflow ANCESTRAL_GENE { EXTRACT_ANCESTRAL.out.comp_location, assignanc_input ) - ch_versions = ch_versions.mix(EXTRACT_ANCESTRAL.out.versions) + ch_versions = ch_versions.mix( EXTRACT_ANCESTRAL.out.versions ) // // MODULES: SORT THE BED FILE @@ -54,7 +54,7 @@ workflow ANCESTRAL_GENE { ASSIGN_ANCESTRAL.out.assigned_bed, [] ) - ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix( BEDTOOLS_SORT.out.versions ) // // MODULES: CONVERT BED TO INDEXED BIGBED @@ -64,11 +64,11 @@ workflow ANCESTRAL_GENE { dot_genome.map{ it[1] }, // Pull file from tuple(meta, file) buscogene_as ) - ch_versions = ch_versions.mix(UCSC_BEDTOBIGBED.out.versions) + ch_versions = ch_versions.mix( UCSC_BEDTOBIGBED.out.versions ) emit: ch_ancestral_bigbed = UCSC_BEDTOBIGBED.out.bigbed - versions = ch_versions.ifEmpty(null) + versions = ch_versions.ifEmpty( null ) } process GrabFiles { label 'process_tiny' diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index be089e73..1ee3451a 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -252,7 +252,7 @@ workflow HIC_MAPPING { GET_PAIRED_CONTACT_BED.out.bed .combine( dot_genome ) .multiMap { meta, paired_contacts, meta_my_genome, my_genome -> - paired : tuple([ id: meta.id, single_end: true], paired_contacts ) + paired : tuple( [ id: meta.id, single_end: true], paired_contacts ) genome : my_genome id : meta.id } @@ -305,18 +305,17 @@ workflow HIC_MAPPING { .map{ meta, cools, cool_bin -> [meta, cools] } - .set{ch_cool} + .set{ ch_cool } // // MODULE: ZOOM COOL TO MCOOL // - COOLER_ZOOMIFY(ch_cool) + COOLER_ZOOMIFY( ch_cool ) ch_versions = ch_versions.mix(COOLER_ZOOMIFY.out.versions) // // LOGIC: FOR REPORTING // - ch_cram_files = GrabFiles( get_reads_input ) ch_cram_files diff --git a/workflows/treeval.nf b/workflows/treeval.nf index b7427cd9..98b11dbb 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -101,7 +101,6 @@ workflow TREEVAL { // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file // GENERATE_GENOME ( - YAML_INPUT.out.assembly_id, YAML_INPUT.out.reference ) ch_versions = ch_versions.mix( GENERATE_GENOME.out.versions ) @@ -113,9 +112,8 @@ workflow TREEVAL { ch_enzyme = Channel.of( "bspq1","bsss1","DLE1" ) INSILICO_DIGEST ( - YAML_INPUT.out.assembly_id, GENERATE_GENOME.out.dot_genome, - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, ch_enzyme, digest_asfile ) @@ -137,10 +135,9 @@ workflow TREEVAL { // GENE_ALIGNMENT ( GENERATE_GENOME.out.dot_genome, - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference GENERATE_GENOME.out.ref_index, GENERATE_GENOME.out.max_scaff_size, - YAML_INPUT.out.assembly_classT, YAML_INPUT.out.align_data_dir, YAML_INPUT.out.align_geneset, YAML_INPUT.out.align_common, @@ -153,7 +150,7 @@ workflow TREEVAL { // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK // REPEAT_DENSITY ( - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, GENERATE_GENOME.out.dot_genome ) ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) @@ -162,7 +159,7 @@ workflow TREEVAL { // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // GAP_FINDER ( - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, GENERATE_GENOME.out.max_scaff_size ) ch_versions = ch_versions.mix(GAP_FINDER.out.versions) @@ -172,7 +169,7 @@ workflow TREEVAL { // // file to generate a file containing sites of self-complementary sequnce. // // SELFCOMP ( - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, GENERATE_GENOME.out.dot_genome, YAML_INPUT.out.mummer_chunk, YAML_INPUT.out.motif_len, @@ -185,7 +182,7 @@ workflow TREEVAL { // and generated a file of syntenic blocks. // SYNTENY ( - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, YAML_INPUT.out.synteny_path, YAML_INPUT.out.assembly_classT ) @@ -195,7 +192,7 @@ workflow TREEVAL { // SUBWORKFLOW: Takes reference, pacbio reads // LONGREAD_COVERAGE ( - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, GENERATE_GENOME.out.dot_genome, YAML_INPUT.out.pacbio_reads ) @@ -204,9 +201,10 @@ workflow TREEVAL { // // SUBWORKFLOW: GENERATE TELOMERE WINDOW FILES WITH PACBIO READS AND REFERENCE // - TELO_FINDER ( GENERATE_GENOME.out.max_scaff_size, - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.teloseq + TELO_FINDER ( + GENERATE_GENOME.out.max_scaff_size, + YAML_INPUT.out.reference, + YAML_INPUT.out.teloseq ) ch_versions = ch_versions.mix(TELO_FINDER.out.versions) @@ -215,8 +213,7 @@ workflow TREEVAL { // BUSCO_ANNOTATION ( GENERATE_GENOME.out.dot_genome, - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.assembly_classT, + YAML_INPUT.out.reference, YAML_INPUT.out.lineageinfo, YAML_INPUT.out.lineagespath, buscogene_asfile, @@ -228,8 +225,8 @@ workflow TREEVAL { // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // KMER ( - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.pacbio_reads + YAML_INPUT.out.reference, + YAML_INPUT.out.longreads_reads ) ch_versions = ch_versions.mix(KMER.out.versions) @@ -237,7 +234,7 @@ workflow TREEVAL { // SUBWORKFLOW: GENERATE HIC MAPPING TO GENERATE PRETEXT FILES AND JUICEBOX // HIC_MAPPING ( - GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.reference, GENERATE_GENOME.out.ref_index, GENERATE_GENOME.out.dot_genome, YAML_INPUT.out.hic_reads, @@ -261,22 +258,20 @@ workflow TREEVAL { // // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // - GENERATE_GENOME.out.reference_tuple - .combine( YAML_INPUT.out.assembly_classT ) - .combine( YAML_INPUT.out.assembly_ttype ) + YAML_INPUT.out.reference .combine( YAML_INPUT.out.assembly_id ) .combine( LONGREAD_COVERAGE.out.ch_reporting ) .combine( HIC_MAPPING.out.ch_reporting ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) - .map { meta, reference, lineage, ticket, sample_id, longread_meta, longread_files, hic_meta, hic_files, custom_file -> [ + .map { meta, reference, longread_meta, longread_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( [ id: meta.id, sz: file(reference).size(), - ln: lineage, - tk: ticket ], + ln: meta.class, + tk: meta.project_type ], reference ), - sample_id: sample_id, + sample_id: meta.id, pb_data: tuple(longread_meta, longread_files), cm_data: tuple(hic_meta, hic_files), custom: custom_file,