From d2144be9b8aa261089eae6de066055959ceea7d4 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 7 Feb 2024 16:20:17 +0000 Subject: [PATCH] Getting closer to parity with TreeVal --- conf/modules.config | 76 ++++++++++++------- .../cram_filter_align_bwamem2_fixmate_sort.nf | 23 +++--- ...filter_minimap2_filter5end_fixmate_sort.nf | 8 +- modules/local/pretext_graph.nf | 12 +-- nextflow.config | 2 +- subworkflows/local/accessory_files.nf | 10 ++- subworkflows/local/generate_maps.nf | 55 ++++++-------- subworkflows/local/hic_bwamem2.nf | 11 +-- subworkflows/local/hic_minimap2.nf | 7 +- subworkflows/local/longread_coverage.nf | 14 ++-- subworkflows/local/pretext_ingestion.nf | 5 +- workflows/curationpretext_allf.nf | 23 ++++-- 12 files changed, 138 insertions(+), 108 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 0190d63..7d5af0b 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -29,7 +29,7 @@ process { } // Coverge and repeat, gap, telo - withName: 'UCSC_BEDGRAPHTOBIGWIG|BEDTOOLS_MERGE_MAX|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MIN|GAP_LENGTH|EXTRACT_TELO' { + withName: 'BED2BW_NORMAL|BED2BW_LOG|BEDTOOLS_MERGE_MAX|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MIN|GAP_LENGTH|EXTRACT_TELO' { publishDir = [ path: { "${params.outdir}/accessory_files" }, mode: params.publish_dir_mode, @@ -69,66 +69,74 @@ process { ext.args = "-n 1" } - withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' { - ext.args = "--MD -t 8" - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + // + // ACCESSORY_FILES -> LONGREAD_COVERAGE + // + withName: ".*:.*:ACCESSORY_FILES:LONGREAD_COVERAGE:SAMTOOLS_VIEW_FILTER_PRIMARY" { + ext.args = "-b -hF 256" + ext.prefix = { "${meta.id}_view" } } - withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN_SPLIT' { - ext.args = { "-t 20 --split-prefix ${meta.split_prefix}" } + withName: '.*:.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' { + ext.args = "--MD -t 8" ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { ext.prefix = { "${meta.id}_merge" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW' { - ext.args = "-b -hF 256" - ext.prefix = { "${meta.id}_view" } - } - - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' { ext.args = "-bga -split" ext.prefix = { "${meta.id}_genome2cov" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { ext.args = "-d 50" ext.prefix = { "maxdepth" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { ext.args = "-d 50" ext.prefix = { "zerodepth" } } - withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:GNU_SORT' { ext.args = "-k1,1 -k2,2n" ext.prefix = { "${meta.id}_sorted" } } - withName: '.*:.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' { + withName: '.*:.*:.*:LONGREAD_COVERAGE:BED2BW_NORMAL' { ext.prefix = 'coverage' } - withName: 'FIND_TELOMERE_REGIONS' { - ext.find_telomere = 'find_telomere' + withName: "FIND_TELOMERE_REGIONS" { + ext.find_telomere = "find_telomere" } - withName: 'FIND_TELOMERE_WINDOWS' { - ext.telomere_jar = 'telomere.jar' - ext.telomere_jvm_params = '-Xms1g -Xmx1g' + withName: "FIND_TELOMERE_WINDOWS" { + ext.telomere_jar = "telomere.jar" + ext.telomere_jvm_params = "-Xms1g -Xmx1g" } withName: PRETEXTMAP_STANDRD { - ext.args = "--sortby length --mapq 0" - ext.prefix = { "${meta.id}_normal" } + ext.args = { "--sortby length --mapq 0 --memory ${task.memory.giga}G" } + ext.prefix = { "${meta.id}_normal_pi" } } withName: PRETEXTMAP_HIGHRES { - ext.args = "--sortby length --highRes --mapq 0" - ext.prefix = { "${meta.id}_hr" } + ext.args = { "--sortby length --highRes --mapq 0 --memory ${task.memory.giga}G" } + ext.prefix = { "${meta.id}_hr_pi" } + } + + withName: ".*:PRETEXT_INGEST_SNDRD:PRETEXT_GRAPH" { + ext.args = { "--textureBuffer 1G" } + ext.prefix = { "${meta.id}_normal" } + } + + withName: ".*:PRETEXT_INGEST_HIRES:PRETEXT_GRAPH" { + ext.args = { "--textureBuffer 1G" } + ext.prefix = { "${meta.id}_hr" } } withName: 'SNAPSHOT_SRES' { @@ -141,6 +149,22 @@ process { ext.prefix = { "${meta.id}_hr" } } + withName: ".*:.*:GENERATE_MAPS:HIC_BWAMEM2:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" { + ext.args = "" + ext.args1 = "-F0xB00 -nt" + ext.args2 = { "-5SPCp -H'${rglines}'" } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } + } + + withName: ".*:.*:GENERATE_MAPS:HIC_MINIMAP2:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" { + ext.args = "" + ext.args1 = "" + ext.args2 = { "-ax sr" } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, diff --git a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf index 3f15dfc..ca706e2 100755 --- a/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf +++ b/modules/local/cram_filter_align_bwamem2_fixmate_sort.nf @@ -1,13 +1,13 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { tag "$meta.id" - label 'process_high' + label "process_high" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' : - 'biocontainers/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" input: - tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix) + tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix), path(reference) output: tuple val(meta), path("*.bam"), emit: mappedbam @@ -18,13 +18,18 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { script: def args = task.ext.args ?: '' + def args1 = task.ext.args1 ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + // Please be aware one of the tools here required mem = 28 * reference size!!! """ cram_filter -n ${from}-${to} ${cramfile} - | \\ - samtools fastq -F0xB00 -nt - | \\ + samtools fastq ${args1} | \\ bwa-mem2 mem -p ${bwaprefix} -t${task.cpus} -5SPCp -H'${rglines}' - | \\ - samtools fixmate -mpu - - | \\ - samtools sort --write-index -l1 -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam - + samtools fixmate ${args3} - - | \\ + samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam - cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -32,7 +37,7 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') END_VERSIONS """ - // temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') + // temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') CAUSES ERROR stub: def prefix = task.ext.prefix ?: "${meta.id}" @@ -44,7 +49,7 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) - bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g') + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') END_VERSIONS """ } diff --git a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf index b704372..f33847f 100644 --- a/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf +++ b/modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf @@ -2,10 +2,12 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { tag "$meta.id" label "process_high" - container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' : + 'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }" input: - tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref) + tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference) output: tuple val(meta), path("*.bam"), emit: mappedbam @@ -52,4 +54,4 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT { minimap2: \$(echo \$(minimap2 version 2>&1) | sed 's/.* //') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/local/pretext_graph.nf b/modules/local/pretext_graph.nf index 680a28c..357edfc 100644 --- a/modules/local/pretext_graph.nf +++ b/modules/local/pretext_graph.nf @@ -8,12 +8,12 @@ process PRETEXT_GRAPH { 'biocontainers/mulled-v2-077b852b8b5440d395ad23f9f24f50c943390a84:da499c75fec554e81f4847c4fa8b6b167afbe3bf-0' }" input: - tuple val(meta), path(pretext_file) - tuple val(gap), path(gap_file) - tuple val(cov), path(coverage) - tuple val(log), path(log_coverage) - tuple val(telo), path(telomere_file) - tuple val(rep), path(repeat_density) + tuple val(meta), path(pretext_file, stageAs: 'pretext.pretext') + tuple val(gap), path(gap_file, stageAs: 'gap.bed') + tuple val(cov), path(coverage, stageAs: 'coverage.bigWig') + tuple val(log), path(log_coverage, stageAs: 'log_cov.bigWig') + tuple val(telo), path(telomere_file, stageAs: 'telo.bedgraph') + tuple val(rep), path(repeat_density, stageAs: 'repeats.bigWig') output: tuple val(meta), path("*.pretext") , emit: pretext diff --git a/nextflow.config b/nextflow.config index 7e0dc97..40db1ee 100755 --- a/nextflow.config +++ b/nextflow.config @@ -15,7 +15,7 @@ params { teloseq = "TTAGGG" pacbio = null cram = null - aligner = "minimap2" + aligner = "bwamem2" pacbio_type = "hifi" // Boilerplate options diff --git a/subworkflows/local/accessory_files.nf b/subworkflows/local/accessory_files.nf index 6c6fe4e..454b80a 100755 --- a/subworkflows/local/accessory_files.nf +++ b/subworkflows/local/accessory_files.nf @@ -40,7 +40,7 @@ workflow ACCESSORY_FILES { // GET_LARGEST_SCAFF ( GENERATE_GENOME_FILE.out.dotgenome ) ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions ) - + // // SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS // @@ -58,7 +58,7 @@ workflow ACCESSORY_FILES { reference_tuple, params.teloseq ) - ch_versions = ch_versions.mix(TELO_FINDER.out.versions) + ch_versions = ch_versions.mix(TELO_FINDER.out.versions) // // SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK @@ -70,10 +70,11 @@ workflow ACCESSORY_FILES { ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions) // - // SUBWORKFLOW: Takes reference, pacbio reads + // SUBWORKFLOW: Takes reference, pacbio reads // - LONGREAD_COVERAGE ( + LONGREAD_COVERAGE ( reference_tuple, + SAMTOOLS_FAIDX.out.fai, GENERATE_GENOME_FILE.out.dotgenome, pacbio_reads ) @@ -86,6 +87,7 @@ workflow ACCESSORY_FILES { telo_file = TELO_FINDER.out.bedgraph_file repeat_file = REPEAT_DENSITY.out.repeat_density coverage_bw = LONGREAD_COVERAGE.out.ch_bigwig + coverage_log_bw = LONGREAD_COVERAGE.out.ch_bigwig_log mins_bed = LONGREAD_COVERAGE.out.ch_minbed half_bed = LONGREAD_COVERAGE.out.ch_halfbed maxs_bed = LONGREAD_COVERAGE.out.ch_maxbed diff --git a/subworkflows/local/generate_maps.nf b/subworkflows/local/generate_maps.nf index bb46765..0b62810 100755 --- a/subworkflows/local/generate_maps.nf +++ b/subworkflows/local/generate_maps.nf @@ -18,7 +18,7 @@ include { HIC_BWAMEM2 } from '../../subworkflows/l workflow GENERATE_MAPS { take: reference_tuple // Channel [ val(meta), path(file) ] - hic_reads_path // Channel [ path(directory) ] + hic_reads_path // Channel [ path(directory) ] main: ch_versions = Channel.empty() @@ -30,8 +30,7 @@ workflow GENERATE_MAPS { reference_tuple, [[],[]] ) - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) - + ch_versions = ch_versions.mix( SAMTOOLS_FAIDX.out.versions ) // // MODULE: Indexing on reference output the folder of indexing files @@ -39,33 +38,25 @@ workflow GENERATE_MAPS { BWAMEM2_INDEX ( reference_tuple ) - ch_versions = ch_versions.mix(BWAMEM2_INDEX.out.versions) - - Channel.of( - [ - [id: 'hic_path'], - hic_reads_path - ] - ) - .set { ch_hic_path } + ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) // // MODULE: generate a cram csv file containing the required parametres for CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT // GENERATE_CRAM_CSV ( - ch_hic_path + hic_reads_path ) - ch_versions = ch_versions.mix(GENERATE_CRAM_CSV.out.versions) + ch_versions = ch_versions.mix( GENERATE_CRAM_CSV.out.versions ) // // LOGIC: make branches for different hic aligner. // hic_reads_path .combine( reference_tuple ) - .map{ meta, hic_read_path, ref_meta, ref-> + .map{ meta, hic_read_path, ref_meta, ref -> tuple( - [ id: ref_meta, - aligner: meta.aligner + [ id: ref_meta.id, + aligner: ref_meta.aligner ], ref ) @@ -76,13 +67,15 @@ workflow GENERATE_MAPS { } .set{ ch_aligner } + ch_aligner.minimap2.view() + // // SUBWORKFLOW: mapping hic reads using minimap2 // HIC_MINIMAP2 ( ch_aligner.minimap2, GENERATE_CRAM_CSV.out.csv, - reference_index + SAMTOOLS_FAIDX.out.fai ) ch_versions = ch_versions.mix( HIC_MINIMAP2.out.versions ) mergedbam = HIC_MINIMAP2.out.mergedbam @@ -93,10 +86,11 @@ workflow GENERATE_MAPS { HIC_BWAMEM2 ( ch_aligner.bwamem2, GENERATE_CRAM_CSV.out.csv, - reference_index + SAMTOOLS_FAIDX.out.fai, + BWAMEM2_INDEX.out.index ) ch_versions = ch_versions.mix( HIC_BWAMEM2.out.versions ) - mergedbam = mergedbam.mix(HIC_BWAMEM2.out.mergedbam) + mergedbam = HIC_BWAMEM2.out.mergedbam // // LOGIC: PREPARING PRETEXT MAP INPUT @@ -104,9 +98,11 @@ workflow GENERATE_MAPS { mergedbam .combine( reference_tuple ) .multiMap { bam_meta, bam, ref_meta, ref_fa -> - input_bam: tuple( [ id: bam_meta.id, - sz: file( bam ).size() ], - bam + input_bam: tuple( + [ id: bam_meta.id, + sz: file( bam ).size() + ], + bam ) reference: ref_fa } @@ -119,18 +115,13 @@ workflow GENERATE_MAPS { pretext_input.input_bam, pretext_input.reference ) - ch_versions = ch_versions.mix(PRETEXTMAP_STANDRD.out.versions) + ch_versions = ch_versions.mix( PRETEXTMAP_STANDRD.out.versions ) // // LOGIC: HIRES IS TOO INTENSIVE FOR RUNNING IN GITHUB CI SO THIS STOPS IT RUNNING // - if ( params.config_profile_name ) { - config_profile_name = params.config_profile_name - } else { - config_profile_name = 'Local' - } - if ( !config_profile_name.contains('GitHub') ) { + if ( params.config_profile_name != 'GitHub' ) { // // MODULE: GENERATE PRETEXT MAP FROM MAPPED BAM FOR HIGH RES // @@ -147,7 +138,7 @@ workflow GENERATE_MAPS { SNAPSHOT_SRES ( PRETEXTMAP_STANDRD.out.pretext ) - ch_versions = ch_versions.mix(SNAPSHOT_SRES.out.versions) + ch_versions = ch_versions.mix( SNAPSHOT_SRES.out.versions ) // NOTE: SNAPSHOT HRES IS TEMPORARILY REMOVED DUE TO ISSUES WITH MEMORY // @@ -158,8 +149,6 @@ workflow GENERATE_MAPS { //) //ch_versions = ch_versions.mix(SNAPSHOT_HRES.out.versions) - - emit: standrd_pretext = PRETEXTMAP_STANDRD.out.pretext standrd_snpshot = SNAPSHOT_SRES.out.image diff --git a/subworkflows/local/hic_bwamem2.nf b/subworkflows/local/hic_bwamem2.nf index bfe8810..ed338a8 100644 --- a/subworkflows/local/hic_bwamem2.nf +++ b/subworkflows/local/hic_bwamem2.nf @@ -17,20 +17,16 @@ workflow HIC_BWAMEM2 { reference_tuple // Channel: tuple [ val(meta), path( file ) ] csv_ch reference_index + bwa_index main: ch_versions = Channel.empty() mappedbam_ch = Channel.empty() - BWAMEM2_INDEX ( - reference_tuple - ) - ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) - csv_ch .splitCsv() .combine ( reference_tuple ) - .combine ( BWAMEM2_INDEX.out.index ) + .combine ( bwa_index ) .map{ cram_id, cram_info, ref_id, ref_dir, bwa_id, bwa_path -> tuple([ id: cram_id.id @@ -42,7 +38,8 @@ workflow HIC_BWAMEM2 { cram_info[4], cram_info[5], cram_info[6], - bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1] + bwa_path.toString() + '/' + ref_dir.toString().split('/')[-1], + ref_dir ) } .set { ch_filtering_input } diff --git a/subworkflows/local/hic_minimap2.nf b/subworkflows/local/hic_minimap2.nf index 957b4dd..800d4f5 100644 --- a/subworkflows/local/hic_minimap2.nf +++ b/subworkflows/local/hic_minimap2.nf @@ -50,7 +50,8 @@ workflow HIC_MINIMAP2 { cram_info[4], cram_info[5], cram_info[6], - mmi_path.toString() + mmi_path.toString(), + ref_dir ) } .set { ch_filtering_input } @@ -60,12 +61,10 @@ workflow HIC_MINIMAP2 { // CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT ( ch_filtering_input - ) ch_versions = ch_versions.mix( CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.versions ) mappedbam_ch = CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT.out.mappedbam - // // LOGIC: PREPARING BAMS FOR MERGE // @@ -96,6 +95,6 @@ workflow HIC_MINIMAP2 { emit: - mergedbam = SAMTOOLS_MERGE.out.bam + mergedbam = SAMTOOLS_MERGE.out.bam versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/longread_coverage.nf b/subworkflows/local/longread_coverage.nf index e1e7f64..5837a05 100755 --- a/subworkflows/local/longread_coverage.nf +++ b/subworkflows/local/longread_coverage.nf @@ -9,7 +9,6 @@ include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MAX } from '../../modules/nf include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MIN } from '../../modules/nf-core/bedtools/merge/main' include { GNU_SORT } from '../../modules/nf-core/gnu/sort/main' include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main' -include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_SPLIT } from '../../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' @@ -19,14 +18,16 @@ include { UCSC_BEDGRAPHTOBIGWIG as BED2BW_LOG } from '../../modules/nf include { GRAPHOVERALLCOVERAGE } from '../../modules/local/graphoverallcoverage' include { GETMINMAXPUNCHES } from '../../modules/local/getminmaxpunches' include { FINDHALFCOVERAGE } from '../../modules/local/findhalfcoverage' +include { LONGREADCOVERAGESCALELOG } from '../../modules/local/longreadcoveragescalelog' workflow LONGREAD_COVERAGE { take: - reference_tuple // Channel: [ val(meta), path(reference_file) ] - dot_genome // Channel: [ val(meta), [ path(datafile) ] ] - reads_path // Channel: [ val(meta), val( str ) ] + reference_tuple // Channel: [ val(meta), path( reference_file ) ] + reference_index // Channel: [ val(meta), path( reference_indx ) ] + dot_genome // Channel: [ val(meta), [ path( datafile ) ] ] + reads_path // Channel: [ val(meta), val( str ) ] main: ch_versions = Channel.empty() @@ -276,7 +277,7 @@ workflow LONGREAD_COVERAGE { bed2bw_normal_input.ch_coverage_bed, bed2bw_normal_input.genome_file ) - ch_versions = ch_versions.mix( UCSC_BEDGRAPHTOBIGWIG.out.versions ) + ch_versions = ch_versions.mix( BED2BW_NORMAL.out.versions ) // // MODULE: CONVERT COVERAGE TO LOG @@ -311,7 +312,8 @@ workflow LONGREAD_COVERAGE { ch_minbed = BEDTOOLS_MERGE_MIN.out.bed ch_halfbed = FINDHALFCOVERAGE.out.bed ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed - ch_bigwig = UCSC_BEDGRAPHTOBIGWIG.out.bigwig + ch_bigwig = BED2BW_NORMAL.out.bigwig + ch_bigwig_log = BED2BW_LOG.out.bigwig versions = ch_versions } diff --git a/subworkflows/local/pretext_ingestion.nf b/subworkflows/local/pretext_ingestion.nf index a074c05..2e0b139 100644 --- a/subworkflows/local/pretext_ingestion.nf +++ b/subworkflows/local/pretext_ingestion.nf @@ -17,6 +17,7 @@ workflow PRETEXT_INGESTION { // LOGIC: GAP OR TELOMERE FILES CAN SOMETIMES BE EMPTY // CHECK IF EMPTY AND ASSIGN APPROPRIATE BRANCHING // + gap_file .map { meta, gap_file -> tuple( [ id: meta.id, @@ -28,8 +29,8 @@ workflow PRETEXT_INGESTION { .set { ch_gap } telomere_file - .map { meta, telo_file -> - tuple( [ id: meta.id, + .map { telo_file -> + tuple( [ id: 'telo_file', sz: telo_file.size().toInteger(), ft: 'telomere' ], telo_file diff --git a/workflows/curationpretext_allf.nf b/workflows/curationpretext_allf.nf index 775b68f..2e2d32a 100755 --- a/workflows/curationpretext_allf.nf +++ b/workflows/curationpretext_allf.nf @@ -19,9 +19,9 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { GENERATE_MAPS } from '../subworkflows/local/generate_maps' -include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files' - +include { GENERATE_MAPS } from '../subworkflows/local/generate_maps' +include { ACCESSORY_FILES } from '../subworkflows/local/accessory_files' +include { PRETEXT_INGESTION as PRETEXT_INGEST_SNDRD } from '../subworkflows/local/pretext_ingestion' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -48,8 +48,8 @@ workflow CURATIONPRETEXT_ALLF { Channel.of( [ - [ id: params.sample, - aligner: params.aligner + [ id: params.sample, + aligner: params.aligner ], params.input ] @@ -77,12 +77,20 @@ workflow CURATIONPRETEXT_ALLF { // // SUBWORKFLOW: GENERATE SUPPLEMENTARY FILES FOR PRETEXT INGESTION // - ACCESSORY_FILES ( reference_tuple, pacbio_reads ) + ACCESSORY_FILES ( + reference_tuple, + pacbio_reads + ) + ch_versions = ch_versions.mix( ACCESSORY_FILES.out.versions ) // // SUBWORKFLOW: GENERATE ONLY PRETEXT MAPS, NO EXTRA FILES // - GENERATE_MAPS ( reference_tuple, params.cram ) + GENERATE_MAPS ( + reference_tuple, + cram_reads + ) + ch_versions = ch_versions.mix( GENERATE_MAPS.out.versions ) // // MODULE: INGEST ACCESSORY FILES INTO PRETEXT BY DEFAULT @@ -92,6 +100,7 @@ workflow CURATIONPRETEXT_ALLF { GENERATE_MAPS.out.standrd_pretext, ACCESSORY_FILES.out.gap_file, ACCESSORY_FILES.out.coverage_bw, + ACCESSORY_FILES.out.coverage_log_bw, ACCESSORY_FILES.out.telo_file, ACCESSORY_FILES.out.repeat_file )