diff --git a/assets/github_testing/TreeValTinyTest.yaml b/assets/github_testing/TreeValTinyTest.yaml index 24f85aed..834544e0 100755 --- a/assets/github_testing/TreeValTinyTest.yaml +++ b/assets/github_testing/TreeValTinyTest.yaml @@ -7,8 +7,8 @@ assembly: project_id: DTOL reference_file: /home/runner/work/treeval/treeval/TreeValTinyData/assembly/draft/grTriPseu1.fa assem_reads: - longread_type: hifi - longread_data: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/ + read_type: hifi + read_data: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/pacbio/ hic_data: /home/runner/work/treeval/treeval/TreeValTinyData/genomic_data/hic-arima/ supplementary_data: path kmer_profile: diff --git a/assets/local_testing/nxOscDF5033.yaml b/assets/local_testing/nxOscDF5033.yaml index 438ee874..fbba60ab 100755 --- a/assets/local_testing/nxOscDF5033.yaml +++ b/assets/local_testing/nxOscDF5033.yaml @@ -7,8 +7,8 @@ assembly: project_id: DTOL reference_file: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta assem_reads: - longread_type: hifi - longread_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ + read_type: hifi + read_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/pacbio/fasta/ hic_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/full/ supplementary_data: path kmer_profile: diff --git a/conf/base.config b/conf/base.config index 4d47205f..85cb0d52 100755 --- a/conf/base.config +++ b/conf/base.config @@ -110,9 +110,9 @@ process { } // Standard parameters, covers most insecta - withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { - cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } + withName: '.*:.*:READ_COVERAGE:MINIMAP2_ALIGN' { + cpus = { check_max( 20 * 1, 'cpus' ) } + memory = { check_max( 50.GB * task.attempt, 'memory' ) } time = { check_max( 20.h * task.attempt, 'time' ) } } diff --git a/conf/modules.config b/conf/modules.config index 33a2041d..648f936f 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -68,7 +68,6 @@ process { ext.prefix = { "${meta.id}.sorted" } } - // // SUBWORKFLOW: SELFCOMP // @@ -179,56 +178,51 @@ process { // - // SUBWORKFLOW: LONGREAD + // SUBWORKFLOW: READ_COVERAGE // - withName: ".*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN" { - ext.args = "--MD -t 8" - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize(".")[0]}" } - } - - withName: ".*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN_SPLIT" { - ext.args = { "-t 20 --split-prefix ${meta.split_prefix}" } - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize(".")[0]}" } + withName: MINIMAP2_ALIGN { + ext.args = {'-ax '+ (meta.readtype.equals("hifi") ? "map-hifi" : meta.readtype.equals("clr") ? "map-pb" : meta.readtype.equals("ont") ? "map-ont" : meta.readtype.equals("illumina") ? "sr" : "") + ' --cs=short' + (reference.size() > 2.5e9 ? (" -I" + Math.ceil(reference.size()/1000000000)+"G") : "") } + ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } } - withName: ".*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE" { + withName: ".*:.*:READ_COVERAGE:SAMTOOLS_MERGE" { ext.prefix = { "${meta.id}_merge" } } - withName: ".*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT" { + withName: ".*:.*:READ_COVERAGE:SAMTOOLS_SORT" { ext.prefix = { "${meta.id}_sorted" } } - withName: ".*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW" { + withName: ".*:.*:READ_COVERAGE:SAMTOOLS_VIEW_FILTER_PRIMARY" { ext.args = "-b -hF 256" ext.prefix = { "${meta.id}_view" } } - withName: ".*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV" { + withName: ".*:.*:READ_COVERAGE:BEDTOOLS_GENOMECOV" { ext.args = "-bga -split" ext.prefix = { "${meta.id}_genome2cov" } } - withName: ".*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX" { + withName: ".*:.*:READ_COVERAGE:BEDTOOLS_MERGE_MAX" { ext.args = "-d 50" ext.prefix = { "maxdepth" } } - withName: ".*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN" { + withName: ".*:.*:READ_COVERAGE:BEDTOOLS_MERGE_MIN" { ext.args = "-d 50" ext.prefix = { "zerodepth" } } - withName: ".*:.*:LONGREAD_COVERAGE:GNU_SORT" { + withName: ".*:.*:READ_COVERAGE:GNU_SORT" { ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } ext.prefix = { "${meta.id}_sorted" } } - withName: ".*:.*:LONGREAD_COVERAGE:BED2BW_NORMAL" { + withName: ".*:.*:READ_COVERAGE:BED2BW_NORMAL" { ext.prefix = { "${meta.id}_coverage_normal" } } - withName: ".*:.*:LONGREAD_COVERAGE:BED2BW_LOG" { + withName: ".*:.*:READ_COVERAGE:BED2BW_LOG" { ext.prefix = { "${meta.id}_coverage_log" } } diff --git a/modules.json b/modules.json index 2226ccf7..069a3f09 100755 --- a/modules.json +++ b/modules.json @@ -94,7 +94,8 @@ "minimap2/align": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "minimap2/index": { "branch": "master", @@ -136,6 +137,11 @@ "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, + "samtools/index": { + "branch": "master", + "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", + "installed_by": ["modules"] + }, "samtools/markdup": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 4da47c18..209c80df 100755 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -26,7 +26,7 @@ process MINIMAP2_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ @@ -38,7 +38,20 @@ process MINIMAP2_ALIGN { $cigar_paf \\ $set_cigar_bam \\ $bam_output + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def extension = bam_format ? "bam" : "paf" + """ + touch ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff new file mode 100644 index 00000000..849e026d --- /dev/null +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -0,0 +1,35 @@ +Changes in module 'nf-core/minimap2/align' +--- modules/nf-core/minimap2/align/main.nf ++++ modules/nf-core/minimap2/align/main.nf +@@ -26,7 +26,7 @@ + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" ++ def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + """ +@@ -38,7 +38,20 @@ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ minimap2: \$(minimap2 --version 2>&1) ++ END_VERSIONS ++ """ + ++ stub: ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ def bam_output = reference.size() > 2.5e9 && bam_format ? "-a | samtools view -b -T ${reference} - > ${prefix}.bam" : reference.size() < 2.5e9 && bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" ++ def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' ++ def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' ++ def extension = bam_format ? "bam" : "paf" ++ """ ++ touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +************************************************************ diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..296ed99e --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,7 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.18 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..8ad18fdc --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : + 'biocontainers/samtools:1.18--h50ea8bc_1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..01a4ee03 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,57 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..c76a9169 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("sarscov2 [BAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CRAI]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } + + test("homo_sapiens [CSI]") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert path(process.out.versions.get(0)).getText().contains("samtools") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..b3baee7f --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,28 @@ +{ + "crai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "timestamp": "2023-11-15T15:17:37.30801" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "timestamp": "2023-11-15T15:17:30.869234" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/subworkflows/local/read_coverage.nf b/subworkflows/local/read_coverage.nf new file mode 100755 index 00000000..0b08dce9 --- /dev/null +++ b/subworkflows/local/read_coverage.nf @@ -0,0 +1,344 @@ +#!/usr/bin/env nextflow + +// +// MODULE IMPORT BLOCK +// +include { BEDTOOLS_BAMTOBED } from '../../modules/nf-core/bedtools/bamtobed/main' +include { BEDTOOLS_GENOMECOV } from '../../modules/nf-core/bedtools/genomecov/main' +include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MAX } from '../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_MERGE as BEDTOOLS_MERGE_MIN } from '../../modules/nf-core/bedtools/merge/main' +include { GNU_SORT } from '../../modules/nf-core/gnu/sort/main' +include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main' +include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER_PRIMARY } from '../../modules/nf-core/samtools/view/main' +include { UCSC_BEDGRAPHTOBIGWIG as BED2BW_NORMAL } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' +include { UCSC_BEDGRAPHTOBIGWIG as BED2BW_LOG } from '../../modules/nf-core/ucsc/bedgraphtobigwig/main' +include { GRAPHOVERALLCOVERAGE } from '../../modules/local/graphoverallcoverage' +include { GETMINMAXPUNCHES } from '../../modules/local/getminmaxpunches' +include { FINDHALFCOVERAGE } from '../../modules/local/findhalfcoverage' +include { LONGREADCOVERAGESCALELOG } from '../../modules/local/longreadcoveragescalelog' + +workflow READ_COVERAGE { + + take: + reference_ch // Channel: tuple [ val(meta), file( reference_file ) ] + dot_genome // Channel: tuple [ val(meta), [ file( datafile ) ] ] + read_ch // Channel: tuple [ val(meta), val( str ) ] read channel (.fasta.gz) + + main: + ch_versions = Channel.empty() + + // + // LOGIC: TAKE THE READ FOLDER AS INPUT AND GENERATE THE CHANNEL OF READ FILES + // + ch_grabbed_reads_path = GrabFiles( read_ch ) + + ch_grabbed_reads_path + .map { meta, files -> + tuple( files ) + } + .flatten() + .set { ch_reads_path } + + // + // LOGIC: PREPARE FOR MINIMAP2, USING READ_TYPE AS FILTER TO DEFINE THE MAPPING METHOD, CHECK YAML_INPUT.NF + // + reference_ch + .combine( ch_reads_path ) + .combine( read_ch) + .map { meta, ref, reads_path, read_meta, readfolder -> + tuple( + [ id : meta.id, + single_end : read_meta.single_end, + readtype : read_meta.read_type.toString() + ], + reads_path, + ref, + true, + false, + false, + read_meta.read_type.toString() + ) + } + .set { pre_minimap_input } + + pre_minimap_input + .multiMap { meta, reads_path, ref, bam_output, cigar_paf, cigar_bam, reads_type -> + read_tuple : tuple( meta, reads_path) + ref : ref + bool_bam_ouput : bam_output + bool_cigar_paf : cigar_paf + bool_cigar_bam : cigar_bam + } + .set { minimap_input } + + // + // PROCESS: MINIMAP ALIGNMENT + // + MINIMAP2_ALIGN ( + minimap_input.read_tuple, + minimap_input.ref, + minimap_input.bool_bam_ouput, + minimap_input.bool_cigar_paf, + minimap_input.bool_cigar_bam + ) + ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + ch_bams = MINIMAP2_ALIGN.out.bam + + ch_bams + .map { meta, file -> + tuple( file ) + } + .collect() + .map { file -> + tuple ( + [ id : file[0].toString().split('/')[-1].split('_')[0] ], // Change sample ID + file + ) + } + .set { collected_files_for_merge } + + // + // MODULE: MERGE ALL OUTPUT BAM + // + SAMTOOLS_MERGE( + collected_files_for_merge, + reference_ch, + [[],[]] + ) + ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions) + + // + // MODULE: SORT MAPPED BAM + // + SAMTOOLS_SORT ( + SAMTOOLS_MERGE.out.bam + ) + ch_versions = ch_versions.mix( SAMTOOLS_SORT.out.versions ) + + // + // MODULE: INDEXING SORTED MAPPED BAM + // + SAMTOOLS_INDEX ( + SAMTOOLS_SORT.out.bam + ) + ch_versions = ch_versions.mix( SAMTOOLS_INDEX.out.versions ) + + // + // LOGIC: PREPARING MERGE INPUT WITH REFERENCE GENOME AND REFERENCE INDEX + // + SAMTOOLS_SORT.out.bam + .combine( reference_ch ) + .multiMap { meta, bam, ref_meta, ref -> + bam_input : tuple( + [ id : meta.id, + sz : bam.size(), + single_end : true ], + bam, + [] // As we aren't using an index file here + ) + ref_input : tuple( + ref_meta, + ref + ) + } + .set { view_input } + + // + // MODULE: EXTRACT READS FOR PRIMARY ASSEMBLY + // + SAMTOOLS_VIEW_FILTER_PRIMARY( + view_input.bam_input, + view_input.ref_input, + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW_FILTER_PRIMARY.out.versions) + + // + // MODULE: BAM TO PRIMARY BED + // + BEDTOOLS_BAMTOBED( + SAMTOOLS_VIEW_FILTER_PRIMARY.out.bam + ) + ch_versions = ch_versions.mix(BEDTOOLS_BAMTOBED.out.versions) + + // + // LOGIC: PREPARING Genome2Cov INPUT + // + BEDTOOLS_BAMTOBED.out.bed + .combine( dot_genome ) + .multiMap { meta, file, my_genome_meta, my_genome -> + input_tuple : tuple ( + [ id : meta.id, + single_end : true ], + file, + 1 + ) + dot_genome : my_genome + file_suffix : 'bed' + } + .set { genomecov_input } + + // + // MODULE: Genome2Cov + // + BEDTOOLS_GENOMECOV( + genomecov_input.input_tuple, + genomecov_input.dot_genome, + genomecov_input.file_suffix + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions) + + // + // MODULE: SORT THE PRIMARY BED FILE + // + GNU_SORT( + BEDTOOLS_GENOMECOV.out.genomecov + ) + ch_versions = ch_versions.mix(GNU_SORT.out.versions) + + // + // MODULE: get_minmax_punches + // + GETMINMAXPUNCHES( + GNU_SORT.out.sorted + ) + ch_versions = ch_versions.mix(GETMINMAXPUNCHES.out.versions) + + // + // MODULE: get_minmax_punches + // + BEDTOOLS_MERGE_MAX( + GETMINMAXPUNCHES.out.max + ) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MAX.out.versions) + + // + // MODULE: get_minmax_punches + // + BEDTOOLS_MERGE_MIN( + GETMINMAXPUNCHES.out.min + ) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE_MIN.out.versions) + + // + // MODULE: GENERATE DEPTHGRAPH + // + GRAPHOVERALLCOVERAGE( + GNU_SORT.out.sorted + ) + ch_versions = ch_versions.mix(GRAPHOVERALLCOVERAGE.out.versions) + ch_depthgraph = GRAPHOVERALLCOVERAGE.out.part + + // + // LOGIC: PREPARING FINDHALFCOVERAGE INPUT + // + GNU_SORT.out.sorted + .combine( GRAPHOVERALLCOVERAGE.out.part ) + .combine( dot_genome ) + .multiMap { meta, file, meta_depthgraph, depthgraph, meta_my_genome, my_genome -> + halfcov_bed : tuple( [ id : meta.id, single_end : true ], file ) + genome_file : my_genome + depthgraph_file : depthgraph + } + .set { halfcov_input } + + // + // MODULE: FIND REGIONS OF HALF COVERAGE + // + FINDHALFCOVERAGE( + halfcov_input.halfcov_bed, + halfcov_input.genome_file, + halfcov_input.depthgraph_file + ) + ch_versions = ch_versions.mix(FINDHALFCOVERAGE.out.versions) + + // + // LOGIC: PREPARING NORMAL COVERAGE INPUT + // + GNU_SORT.out.sorted + .combine( dot_genome ) + .combine(reference_ch) + .multiMap { meta, file, meta_my_genome, my_genome, ref_meta, ref -> + ch_coverage_bed : tuple ([ id: ref_meta.id, single_end: true], file) + genome_file : my_genome + } + .set { bed2bw_normal_input } + + // + // MODULE: CONVERT BEDGRAPH TO BIGWIG FOR NORMAL COVERAGE + // + BED2BW_NORMAL( + bed2bw_normal_input.ch_coverage_bed, + bed2bw_normal_input.genome_file + ) + ch_versions = ch_versions.mix(BED2BW_NORMAL.out.versions) + + // + // MODULE: CONVERT COVERAGE TO LOG + // + LONGREADCOVERAGESCALELOG( + GNU_SORT.out.sorted + ) + ch_versions = ch_versions.mix(LONGREADCOVERAGESCALELOG.out.versions) + + // + // LOGIC: PREPARING LOG COVERAGE INPUT + // + LONGREADCOVERAGESCALELOG.out.bed + .combine( dot_genome ) + .combine(reference_ch) + .multiMap { meta, file, meta_my_genome, my_genome, ref_meta, ref -> + ch_coverage_bed : tuple ([ id: ref_meta.id, single_end: true], file) + genome_file : my_genome + } + .set { bed2bw_log_input } + + // + // MODULE: CONVERT BEDGRAPH TO BIGWIG FOR LOG COVERAGE + // + BED2BW_LOG( + bed2bw_log_input.ch_coverage_bed, + bed2bw_log_input.genome_file + ) + ch_versions = ch_versions.mix(BED2BW_LOG.out.versions) + + // + // LOGIC: GENERATE A SUMMARY TUPLE FOR OUTPUT + // + ch_grabbed_reads_path + .collect() + .map { meta, fasta -> + tuple( [ id: 'read', + sz: fasta instanceof ArrayList ? fasta.collect { it.size()} : fasta.size() ], + fasta + ) + } + .set { ch_reporting_pacbio } + + emit: + ch_minbed = BEDTOOLS_MERGE_MIN.out.bed + ch_halfbed = FINDHALFCOVERAGE.out.bed + ch_maxbed = BEDTOOLS_MERGE_MAX.out.bed + ch_reporting = ch_reporting_pacbio.collect() + ch_covbw_nor = BED2BW_NORMAL.out.bigwig + ch_covbw_log = BED2BW_LOG.out.bigwig + versions = ch_versions +} + +process GrabFiles { + label 'process_tiny' + + tag "${meta.id}" + executor 'local' + + input: + tuple val(meta), path("in") + + output: + tuple val(meta), path("in/*.{fa,fasta}.{gz}") + + "true" +} diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index 1fa39a4f..645e3fcd 100755 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -50,8 +50,8 @@ workflow YAML_INPUT { group .assembly_reads .multiMap { data -> - longread_type: data.longread_type - longread_data: data.longread_data + read_type: data.read_type + read_data: data.read_data hic: data.hic_data supplement: data.supplementary_data } @@ -134,18 +134,34 @@ workflow YAML_INPUT { } .set { ref_ch } - tolid_version - .combine( assem_reads.longread_type ) - .combine( assem_reads.longread_data ) - .map{ sample, type, data -> - tuple( [ id : sample, - single_end : true, - longread_type : type - ], - data - ) - } - .set { longread_ch } + if ( assem_reads.read_type.filter { it == "hifi" } || assem_reads.read_type.filter { it == "clr" } || assem_reads.read_type.filter { it == "ont" } ) { + tolid_version + .combine( assem_reads.read_type ) + .combine( assem_reads.read_data ) + .map{ sample, type, data -> + tuple( [ id : sample, + single_end : true, + read_type : type + ], + data + ) + } + .set { read_ch } + } + else if ( assem_reads.read_type.filter { it == "illumina" } ) { + tolid_version + .combine( assem_reads.read_type ) + .combine( assem_reads.read_data ) + .map{ sample, type, data -> + tuple( [ id : sample, + single_end : false, + read_type : type + ], + data + ) + } + .set { read_ch } + } tolid_version .combine( assem_reads.hic ) @@ -181,9 +197,10 @@ workflow YAML_INPUT { assembly_id = tolid_version reference_ch = ref_ch + read_ch = read_ch + kmer_prof_file = kmer_prof - longreads_ch = longread_ch hic_reads_ch = hic_ch supp_reads_ch = supplement_ch diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index 560cec19..57d13786 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -26,7 +26,7 @@ include { YAML_INPUT } from '../subworkflows/ include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' include { REPEAT_DENSITY } from '../subworkflows/local/repeat_density' include { GAP_FINDER } from '../subworkflows/local/gap_finder' -include { LONGREAD_COVERAGE } from '../subworkflows/local/longread_coverage' +include { READ_COVERAGE } from '../subworkflows/local/read_coverage' include { TELO_FINDER } from '../subworkflows/local/telo_finder' include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' include { KMER } from '../subworkflows/local/kmer' @@ -101,19 +101,19 @@ workflow TREEVAL_RAPID { // // SUBWORKFLOW: Takes reference, pacbio reads // - LONGREAD_COVERAGE ( + READ_COVERAGE ( YAML_INPUT.out.reference_ch, GENERATE_GENOME.out.dot_genome, - YAML_INPUT.out.longreads_ch + YAML_INPUT.out.read_ch ) - ch_versions = ch_versions.mix( LONGREAD_COVERAGE.out.versions ) + ch_versions = ch_versions.mix( READ_COVERAGE.out.versions ) // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // KMER ( YAML_INPUT.out.reference_ch, - YAML_INPUT.out.longreads_ch + YAML_INPUT.out.read_ch ) ch_versions = ch_versions.mix( KMER.out.versions ) @@ -123,7 +123,7 @@ workflow TREEVAL_RAPID { KMER_READ_COVERAGE ( GENERATE_GENOME.out.dot_genome, YAML_INPUT.out.reference_ch, - YAML_INPUT.out.longreads_ch, + YAML_INPUT.out.read_ch, YAML_INPUT.out.kmer_prof_file ) ch_versions = ch_versions.mix( KMER_READ_COVERAGE.out.versions ) @@ -138,8 +138,8 @@ workflow TREEVAL_RAPID { YAML_INPUT.out.hic_reads_ch, YAML_INPUT.out.assembly_id, GAP_FINDER.out.gap_file, - LONGREAD_COVERAGE.out.ch_covbw_nor, - LONGREAD_COVERAGE.out.ch_covbw_log, + READ_COVERAGE.out.ch_covbw_nor, + READ_COVERAGE.out.ch_covbw_log, TELO_FINDER.out.bedgraph_file, REPEAT_DENSITY.out.repeat_density, params.entry @@ -157,10 +157,10 @@ workflow TREEVAL_RAPID { // LOGIC: GENERATE SOME CHANNELS FOR REPORTING // YAML_INPUT.out.reference_ch - .combine( LONGREAD_COVERAGE.out.ch_reporting ) + .combine( READ_COVERAGE.out.ch_reporting ) .combine( HIC_MAPPING.out.ch_reporting ) .combine( CUSTOM_DUMPSOFTWAREVERSIONS.out.versions ) - .map { meta, reference, longread_meta, longread_files, hic_meta, hic_files, custom_file -> [ + .map { meta, reference, read_meta, read_files, hic_meta, hic_files, custom_file -> [ rf_data: tuple( [ id: meta.id, sz: file(reference).size(), @@ -169,7 +169,7 @@ workflow TREEVAL_RAPID { reference ), sample_id: meta.id, - pb_data: tuple( longread_meta, longread_files ), + pb_data: tuple( read_meta, read_files ), cm_data: tuple( hic_meta, hic_files ), custom: custom_file, ]