From 139cfd1947b5ede9b5675f546e91e080053469f9 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Tue, 22 Aug 2023 11:42:48 +0100 Subject: [PATCH 01/14] Add kmer subworkflow --- conf/modules.config | 16 ++ main.nf | 14 ++ modules.json | 152 ++++++++++++++----- modules/nf-core/fastk/fastk/main.nf | 41 +++++ modules/nf-core/fastk/fastk/meta.yml | 52 +++++++ modules/nf-core/merquryfk/merquryfk/main.nf | 58 +++++++ modules/nf-core/merquryfk/merquryfk/meta.yml | 115 ++++++++++++++ subworkflows/local/kmer.nf | 134 ++++++++++++++++ workflows/treeval.nf | 23 ++- workflows/treeval_plots.nf | 110 ++++++++++++++ workflows/treeval_rapid.nf | 10 ++ 11 files changed, 684 insertions(+), 41 deletions(-) create mode 100644 modules/nf-core/fastk/fastk/main.nf create mode 100644 modules/nf-core/fastk/fastk/meta.yml create mode 100644 modules/nf-core/merquryfk/merquryfk/main.nf create mode 100644 modules/nf-core/merquryfk/merquryfk/meta.yml create mode 100755 subworkflows/local/kmer.nf create mode 100755 workflows/treeval_plots.nf diff --git a/conf/modules.config b/conf/modules.config index 985f2164..c71c0323 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -279,4 +279,20 @@ process { ext.args = { '-k2,2 -nr' } } + withName: FASTK_FASTK { + ext.args = "-k31 -t" + publishDir = [ + path: { "${params.outdir}/kmer" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MERQURYFK_MERQURYFK { + publishDir = [ + path: { "${params.outdir}/kmer_plot" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/main.nf b/main.nf index 0d25a633..a021b28e 100755 --- a/main.nf +++ b/main.nf @@ -24,6 +24,7 @@ WorkflowMain.initialise( workflow, params, log ) include { TREEVAL } from './workflows/treeval' include { TREEVAL_RAPID } from './workflows/treeval_rapid' +include { TREEVAL_PLOTS } from './workflows/treeval_plots' // // WORKFLOW: RUN MAIN PIPELINE GENERATING ALL OUTPUT @@ -39,6 +40,13 @@ workflow SANGERTOL_TREEVAL_RAPID { TREEVAL_RAPID () } +// +// WORKFLOW: +// +workflow SANGERTOL_TREEVAL_PLOTS { + TREEVAL_PLOTS () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -56,6 +64,12 @@ workflow RAPID { SANGERTOL_TREEVAL_RAPID () } +workflow PLOTS { + SANGERTOL_TREEVAL_PLOTS () +} + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules.json b/modules.json index b3f7eb35..42285c90 100755 --- a/modules.json +++ b/modules.json @@ -8,173 +8,255 @@ "bedtools/bamtobed": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/genomecov": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/intersect": { "branch": "master", "git_sha": "c1532c77717ad7c64752b26b0fd9b4556bdef272", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/makewindows": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/map": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/merge": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/sort": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "busco": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff" }, + "fastk/fastk": { + "branch": "master", + "git_sha": "29e87a37ae1887fc8289f2f56775604a71715cb9", + "installed_by": [ + "modules" + ] + }, "gnu/sort": { "branch": "master", "git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "merquryfk/merquryfk": { + "branch": "master", + "git_sha": "6f150e1503c0826c21fedf1fa566cdbecbe98ec7", + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "miniprot/align": { "branch": "master", "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "miniprot/index": { "branch": "master", "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mummer": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "paftools/sam2paf": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextmap": { "branch": "master", "git_sha": "decfb802f2e573efb7b44ff06b11ecf16853054d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextsnapshot": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/markdup": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/cutn": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ucsc/bedtobigbed": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "windowmasker/mk_counts": { "branch": "master", "git_sha": "30c3ed32e8bd5ddaf349ba2f4f99d38182fdc08c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "windowmasker/ustat": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -183,4 +265,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastk/fastk/main.nf b/modules/nf-core/fastk/fastk/main.nf new file mode 100644 index 00000000..fec5a4d2 --- /dev/null +++ b/modules/nf-core/fastk/fastk/main.nf @@ -0,0 +1,41 @@ +process FASTK_FASTK { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.hist") , emit: hist + tuple val(meta), path("*.ktab*", hidden: true) , emit: ktab, optional: true + tuple val(meta), path("*.{prof,pidx}*", hidden: true), emit: prof, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FASTK_FASTK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + FastK \\ + $args \\ + -T$task.cpus \\ + -M${task.memory.toGiga()} \\ + -N${prefix}_fk \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastk/fastk/meta.yml b/modules/nf-core/fastk/fastk/meta.yml new file mode 100644 index 00000000..55fd1be7 --- /dev/null +++ b/modules/nf-core/fastk/fastk/meta.yml @@ -0,0 +1,52 @@ +name: "fastk_fastk" +description: A fast K-mer counter for high-fidelity shotgun datasets +keywords: + - k-mer + - count + - histogram +tools: + - "fastk": + description: "A fast K-mer counter for high-fidelity shotgun datasets" + homepage: "https://github.com/thegenemyers/FASTK" + + tool_dev_url: "https://github.com/thegenemyers/FASTK" + + licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - hist: + type: file + description: Histogram of k-mers + pattern: "*.hist" + - ktab: + type: file + description: A sorted table of all canonical k‑mers along with their counts. + pattern: "*.ktab" + - prof: + type: file + description: A k‑mer count profile of each sequence in the input data set. + pattern: "*.prof" + +authors: + - "@mahesh-panchal" diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf new file mode 100644 index 00000000..ac163dac --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/main.nf @@ -0,0 +1,58 @@ +process MERQURYFK_MERQURYFK { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' + + input: + tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs) + + output: + tuple val(meta), path("${prefix}.completeness.stats") , emit: stats + tuple val(meta), path("${prefix}.*_only.bed") , emit: bed + tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv + tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true + tuple val(meta), path("${prefix}.qv") , emit: qv + tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true + tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MERQURYFK_MERQURYFK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + MerquryFK \\ + $args \\ + -T$task.cpus \\ + ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ + $assembly \\ + $haplotigs \\ + $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + merquryfk: $MERQURY_VERSION + r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml new file mode 100644 index 00000000..e1f3af01 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/meta.yml @@ -0,0 +1,115 @@ +name: "merquryfk_merquryfk" +description: FastK based version of Merqury +keywords: + - Merqury + - reference-free + - assembly evaluation +tools: + - "merquryfk": + description: "FastK based version of Merqury" + homepage: "https://github.com/thegenemyers/MERQURY.FK" + + tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK" + + licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastk_hist: + type: file + description: A histogram files from the program FastK + pattern: "*.hist" + - fastk_ktab: + type: file + description: Histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - assembly: + type: file + description: Genome (primary) assembly files (fasta format) + pattern: ".fasta" + - haplotigs: + type: file + description: Assembly haplotigs (fasta format) + pattern: ".fasta" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - stats: + type: file + description: Assembly statistics file + pattern: "*.completeness.stats" + - bed: + type: file + description: Assembly only kmer positions not supported by reads in bed format + pattern: "*_only.bed" + - spectra_cn_fl_png: + type: file + description: "Unstacked copy number spectra filled plot in PNG format" + pattern: "*.spectra-cn.fl.png" + - spectra_cn_ln_png: + type: file + description: "Unstacked copy number spectra line plot in PNG format" + pattern: "*.spectra-cn.ln.png" + - spectra_cn_st_png: + type: file + description: "Stacked copy number spectra line plot in PNG format" + pattern: "*.spectra-cn.st.png" + - spectra_asm_fl_png: + type: file + description: "Unstacked assembly spectra filled plot in PNG format" + pattern: "*.spectra-asm.fl.png" + - spectra_asm_ln_png: + type: file + description: "Unstacked assembly spectra line plot in PNG format" + pattern: "*.spectra-asm.ln.png" + - spectra_asm_st_png: + type: file + description: "Stacked assembly spectra line plot in PNG format" + pattern: "*.spectra-asm.st.png" + - spectra_cn_fl_pdf: + type: file + description: "Unstacked copy number spectra filled plot in PDF format" + pattern: "*.spectra-cn.fl.pdf" + - spectra_cn_ln_pdf: + type: file + description: "Unstacked copy number spectra line plot in PDF format" + pattern: "*.spectra-cn.ln.pdf" + - spectra_cn_st_pdf: + type: file + description: "Stacked copy number spectra line plot in PDF format" + pattern: "*.spectra-cn.st.pdf" + - spectra_asm_fl_pdf: + type: file + description: "Unstacked assembly spectra filled plot in PDF format" + pattern: "*.spectra-asm.fl.pdf" + - spectra_asm_ln_pdf: + type: file + description: "Unstacked assembly spectra line plot in PDF format" + pattern: "*.spectra-asm.ln.pdf" + - spectra_asm_st_pdf: + type: file + description: "Stacked assembly spectra line plot in PDF format" + pattern: "*.spectra-asm.st.pdf" + - assembly_qv: + type: file + description: "error and qv table for each scaffold of the assembly" + pattern: "*.qv" + - qv: + type: file + description: "error and qv of each assembly as a whole" + pattern: "*.qv" + +authors: + - "@mahesh-panchal" diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf new file mode 100755 index 00000000..c2337b11 --- /dev/null +++ b/subworkflows/local/kmer.nf @@ -0,0 +1,134 @@ +#!/usr/bin/env nextflow + +// +// Adapted from https://github.com/sanger-tol/genomeassembly +// the Sanger genomeassembly pipeline by @ksenia-krasheninnikova +// +// Convert BAM to CRAM, create index and calculate statistics +// + +// +// MODULE IMPORT BLOCK +// +include { CAT_CAT } from "../../modules/nf-core/cat/cat/main" +include { FASTK_FASTK } from "../../modules/nf-core/fastk/fastk/main" +include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/main' + +workflow KMER { + take: + reference_tuple // Channel [ val(meta), path(file) ] + reads_path // Channel: [ val(meta), val( str ) ] + + main: + ch_versions = Channel.empty() + + // + // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT + // + reference_tuple + .combine( reads_path ) + .map { meta, ref, reads_path -> + tuple( + [ id : meta.id, + single_end : true ], + reads_path + ) + } + .set { get_reads_input } + + get_reads_input.view() + + // + // MODULE: GETS PACBIO READ PATHS FROM READS_PATH + // + ch_grabbed_read_paths = GrabFiles( get_reads_input ) + + ch_grabbed_read_paths.view() + + // + // LOGIC: PACBIO READS FILES TO CHANNEL + // + // ch_grabbed_read_paths + // .map { meta, files -> + // tuple( files ) + // } + // .flatten() + // .set { ch_reads } + + // ch_reads.view() + // // + // // LOGIC: + // // + // reads_path + // .flatMap { meta, reads -> + // reads instanceof List ? reads.collect{ [ meta, it ] } : [ [ meta, reads ] ] + // } + // .set{ reads_ch } + + // + // MODULE: + // + CAT_CAT( ch_grabbed_read_paths ) + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + + // + // LOGIC: + // + CAT_CAT.out.file_out + .map{ meta, reads -> + reads.getName().endsWith('gz') ? [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa.gz'] : [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa'] + } + .set{ ch_reads_merged } + + // + // LOGIC: + // + CAT_CAT.out.file_out + .join(ch_reads_merged) + .map{ meta, reads_old, reads_new -> + reads_old.renameTo(reads_new); + } + + // + // MODULE: + // + FASTK_FASTK( ch_reads_merged ) + ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first()) + + // + // LOGIC: + // + FASTK_FASTK.out.hist + .join(FASTK_FASTK.out.ktab) + .join(reference_tuple) + .map{ meta, hist, ktab, meta_ref, primary -> + // hap.size() ? [ meta, hist, ktab, primary, hap ] : + [ meta, hist, ktab, primary, [] ] + } + .set{ ch_merq } + + ch_merq.view() + // + // MODULE: + // + MERQURYFK_MERQURYFK ( ch_merq ) + ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first()) + + emit: + merquryk_completeness = MERQURYFK_MERQURYFK.out.stats // meta, stats + merquryk_qv = MERQURYFK_MERQURYFK.out.qv // meta, qv + versions = ch_versions.ifEmpty(null) +} + +process GrabFiles { + tag "${meta.id}" + executor 'local' + + input: + tuple val(meta), path("in") + + output: + tuple val(meta), path("in/*.fasta.gz") + + "true" +} \ No newline at end of file diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 37271131..182807b0 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -36,6 +36,7 @@ include { LONGREAD_COVERAGE } from '../subworkflows/local/longread_coverage' include { TELO_FINDER } from '../subworkflows/local/telo_finder' include { BUSCO_ANNOTATION } from '../subworkflows/local/busco_annotation' include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' +include { KMER } from '../subworkflows/local/kmer' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -165,10 +166,10 @@ workflow TREEVAL { ) ch_versions = ch_versions.mix(GAP_FINDER.out.versions) - // - // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as - // file to generate a file containing sites of self-complementary sequnce. - // + // // + // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as + // // file to generate a file containing sites of self-complementary sequnce. + // // SELFCOMP ( GENERATE_GENOME.out.reference_tuple, GENERATE_GENOME.out.dot_genome, @@ -234,6 +235,16 @@ workflow TREEVAL { ) ch_versions = ch_versions.mix(BUSCO_ANNOTATION.out.versions) + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + // KMER { + // GENERATE_GENOME.out.reference_tuple, + // GENERATE_GENOME.out.dot_genome, + // YAML_INPUT.out.pacbio_reads + // } + // ch_versions = ch_versions.mix(KMER.out.versions) + // // SUBWORKFLOW: Collates version data from prior subworflows // @@ -260,8 +271,8 @@ workflow TREEVAL { params.sample_id = YAML_INPUT.out.assembly_id.collect() params.rf_data = rf_data.collect() // reference data tuple( [ id, size, lineage, ticket ], file) - params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta - params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram + // params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta + // params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram emit: software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml diff --git a/workflows/treeval_plots.nf b/workflows/treeval_plots.nf new file mode 100755 index 00000000..fc6962ac --- /dev/null +++ b/workflows/treeval_plots.nf @@ -0,0 +1,110 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowTreeval.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input ] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// IMPORT: SUBWORKFLOWS CALLED BY THE MAIN +// +include { YAML_INPUT } from '../subworkflows/local/yaml_input' +include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' +include { KMER } from '../subworkflows/local/kmer' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// IMPORT: Installed directly from nf-core/modules +// +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow TREEVAL_PLOTS { + + main: + ch_versions = Channel.empty() + + params.entry = 'PLOTS' + input_ch = Channel.fromPath(params.input, checkIfExists: true) + // + // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field + // + YAML_INPUT ( input_ch ) + + // + // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file + // + GENERATE_GENOME ( + YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference + ) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + KMER ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.pacbio_reads + ) + ch_versions = ch_versions.mix(KMER.out.versions) + + // + // SUBWORKFLOW: Collates version data from prior subworflows + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + params.sample_id = YAML_INPUT.out.assembly_id.collect() + + emit: + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) + } + NfcoreTemplate.summary(workflow, params, log) + + TreeValProject.summary(workflow, params) + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index 83749136..7519cc86 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -29,6 +29,7 @@ include { GAP_FINDER } from '../subworkflows/local/gap_finder' include { LONGREAD_COVERAGE } from '../subworkflows/local/longread_coverage' include { TELO_FINDER } from '../subworkflows/local/telo_finder' include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' +include { KMER } from '../subworkflows/local/kmer' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -117,6 +118,15 @@ workflow TREEVAL_RAPID { YAML_INPUT.out.pacbio_reads ) ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) + + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + // KMER ( + // GENERATE_GENOME.out.reference_tuple, + // YAML_INPUT.out.pacbio_reads + // ) + // ch_versions = ch_versions.mix(KMER.out.versions) // // SUBWORKFLOW: Collates version data from prior subworflows From bca46d2f518b6e27830a9935025080a2fbd99eef Mon Sep 17 00:00:00 2001 From: William Eagles Date: Tue, 22 Aug 2023 11:42:48 +0100 Subject: [PATCH 02/14] Add kmer subworkflow --- conf/modules.config | 16 ++ main.nf | 14 ++ modules.json | 152 ++++++++++++++----- modules/nf-core/fastk/fastk/main.nf | 41 +++++ modules/nf-core/fastk/fastk/meta.yml | 52 +++++++ modules/nf-core/merquryfk/merquryfk/main.nf | 58 +++++++ modules/nf-core/merquryfk/merquryfk/meta.yml | 115 ++++++++++++++ subworkflows/local/kmer.nf | 134 ++++++++++++++++ workflows/treeval.nf | 23 ++- workflows/treeval_plots.nf | 110 ++++++++++++++ workflows/treeval_rapid.nf | 10 ++ 11 files changed, 684 insertions(+), 41 deletions(-) create mode 100644 modules/nf-core/fastk/fastk/main.nf create mode 100644 modules/nf-core/fastk/fastk/meta.yml create mode 100644 modules/nf-core/merquryfk/merquryfk/main.nf create mode 100644 modules/nf-core/merquryfk/merquryfk/meta.yml create mode 100755 subworkflows/local/kmer.nf create mode 100755 workflows/treeval_plots.nf diff --git a/conf/modules.config b/conf/modules.config index 985f2164..c71c0323 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -279,4 +279,20 @@ process { ext.args = { '-k2,2 -nr' } } + withName: FASTK_FASTK { + ext.args = "-k31 -t" + publishDir = [ + path: { "${params.outdir}/kmer" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MERQURYFK_MERQURYFK { + publishDir = [ + path: { "${params.outdir}/kmer_plot" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } diff --git a/main.nf b/main.nf index 0d25a633..a021b28e 100755 --- a/main.nf +++ b/main.nf @@ -24,6 +24,7 @@ WorkflowMain.initialise( workflow, params, log ) include { TREEVAL } from './workflows/treeval' include { TREEVAL_RAPID } from './workflows/treeval_rapid' +include { TREEVAL_PLOTS } from './workflows/treeval_plots' // // WORKFLOW: RUN MAIN PIPELINE GENERATING ALL OUTPUT @@ -39,6 +40,13 @@ workflow SANGERTOL_TREEVAL_RAPID { TREEVAL_RAPID () } +// +// WORKFLOW: +// +workflow SANGERTOL_TREEVAL_PLOTS { + TREEVAL_PLOTS () +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS @@ -56,6 +64,12 @@ workflow RAPID { SANGERTOL_TREEVAL_RAPID () } +workflow PLOTS { + SANGERTOL_TREEVAL_PLOTS () +} + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules.json b/modules.json index b3f7eb35..42285c90 100755 --- a/modules.json +++ b/modules.json @@ -8,173 +8,255 @@ "bedtools/bamtobed": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/genomecov": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/intersect": { "branch": "master", "git_sha": "c1532c77717ad7c64752b26b0fd9b4556bdef272", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/makewindows": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/map": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/merge": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/sort": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "busco": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/cat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/cload": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cooler/zoomify": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff" }, + "fastk/fastk": { + "branch": "master", + "git_sha": "29e87a37ae1887fc8289f2f56775604a71715cb9", + "installed_by": [ + "modules" + ] + }, "gnu/sort": { "branch": "master", "git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "merquryfk/merquryfk": { + "branch": "master", + "git_sha": "6f150e1503c0826c21fedf1fa566cdbecbe98ec7", + "installed_by": [ + "modules" + ] }, "minimap2/align": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "minimap2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "miniprot/align": { "branch": "master", "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "miniprot/index": { "branch": "master", "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mummer": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "paftools/sam2paf": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextmap": { "branch": "master", "git_sha": "decfb802f2e573efb7b44ff06b11ecf16853054d", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pretextsnapshot": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/markdup": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/sort": { "branch": "master", "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "seqtk/cutn": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ucsc/bedtobigbed": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "windowmasker/mk_counts": { "branch": "master", "git_sha": "30c3ed32e8bd5ddaf349ba2f4f99d38182fdc08c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "windowmasker/ustat": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -183,4 +265,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/fastk/fastk/main.nf b/modules/nf-core/fastk/fastk/main.nf new file mode 100644 index 00000000..fec5a4d2 --- /dev/null +++ b/modules/nf-core/fastk/fastk/main.nf @@ -0,0 +1,41 @@ +process FASTK_FASTK { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.hist") , emit: hist + tuple val(meta), path("*.ktab*", hidden: true) , emit: ktab, optional: true + tuple val(meta), path("*.{prof,pidx}*", hidden: true), emit: prof, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "FASTK_FASTK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + FastK \\ + $args \\ + -T$task.cpus \\ + -M${task.memory.toGiga()} \\ + -N${prefix}_fk \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastk/fastk/meta.yml b/modules/nf-core/fastk/fastk/meta.yml new file mode 100644 index 00000000..55fd1be7 --- /dev/null +++ b/modules/nf-core/fastk/fastk/meta.yml @@ -0,0 +1,52 @@ +name: "fastk_fastk" +description: A fast K-mer counter for high-fidelity shotgun datasets +keywords: + - k-mer + - count + - histogram +tools: + - "fastk": + description: "A fast K-mer counter for high-fidelity shotgun datasets" + homepage: "https://github.com/thegenemyers/FASTK" + + tool_dev_url: "https://github.com/thegenemyers/FASTK" + + licence: "https://github.com/thegenemyers/FASTK/blob/master/LICENSE" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - hist: + type: file + description: Histogram of k-mers + pattern: "*.hist" + - ktab: + type: file + description: A sorted table of all canonical k‑mers along with their counts. + pattern: "*.ktab" + - prof: + type: file + description: A k‑mer count profile of each sequence in the input data set. + pattern: "*.prof" + +authors: + - "@mahesh-panchal" diff --git a/modules/nf-core/merquryfk/merquryfk/main.nf b/modules/nf-core/merquryfk/merquryfk/main.nf new file mode 100644 index 00000000..ac163dac --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/main.nf @@ -0,0 +1,58 @@ +process MERQURYFK_MERQURYFK { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + container 'ghcr.io/nbisweden/fastk_genescopefk_merquryfk:1.2' + + input: + tuple val(meta), path(fastk_hist), path(fastk_ktab), path(assembly), path(haplotigs) + + output: + tuple val(meta), path("${prefix}.completeness.stats") , emit: stats + tuple val(meta), path("${prefix}.*_only.bed") , emit: bed + tuple val(meta), path("${prefix}.*.qv") , emit: assembly_qv + tuple val(meta), path("${prefix}.*.spectra-cn.fl.png"), emit: spectra_cn_fl_png, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.fl.pdf"), emit: spectra_cn_fl_pdf, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.png"), emit: spectra_cn_ln_png, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.ln.pdf"), emit: spectra_cn_ln_pdf, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.png"), emit: spectra_cn_st_png, optional: true + tuple val(meta), path("${prefix}.*.spectra-cn.st.pdf"), emit: spectra_cn_st_pdf, optional: true + tuple val(meta), path("${prefix}.qv") , emit: qv + tuple val(meta), path("${prefix}.spectra-asm.fl.png") , emit: spectra_asm_fl_png, optional: true + tuple val(meta), path("${prefix}.spectra-asm.fl.pdf") , emit: spectra_asm_fl_pdf, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.png") , emit: spectra_asm_ln_png, optional: true + tuple val(meta), path("${prefix}.spectra-asm.ln.pdf") , emit: spectra_asm_ln_pdf, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.png") , emit: spectra_asm_st_png, optional: true + tuple val(meta), path("${prefix}.spectra-asm.st.pdf") , emit: spectra_asm_st_pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "MERQURYFK_MERQURYFK module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def FASTK_VERSION = 'f18a4e6d2207539f7b84461daebc54530a9559b0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + def MERQURY_VERSION = '8ae344092df5dcaf83cfb7f90f662597a9b1fc61' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + MerquryFK \\ + $args \\ + -T$task.cpus \\ + ${fastk_ktab.find{ it.toString().endsWith(".ktab") }} \\ + $assembly \\ + $haplotigs \\ + $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastk: $FASTK_VERSION + merquryfk: $MERQURY_VERSION + r: \$( R --version | sed '1!d; s/.*version //; s/ .*//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/merquryfk/merquryfk/meta.yml b/modules/nf-core/merquryfk/merquryfk/meta.yml new file mode 100644 index 00000000..e1f3af01 --- /dev/null +++ b/modules/nf-core/merquryfk/merquryfk/meta.yml @@ -0,0 +1,115 @@ +name: "merquryfk_merquryfk" +description: FastK based version of Merqury +keywords: + - Merqury + - reference-free + - assembly evaluation +tools: + - "merquryfk": + description: "FastK based version of Merqury" + homepage: "https://github.com/thegenemyers/MERQURY.FK" + + tool_dev_url: "https://github.com/thegenemyers/MERQURY.FK" + + licence: "https://github.com/thegenemyers/MERQURY.FK/blob/main/LICENSE" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastk_hist: + type: file + description: A histogram files from the program FastK + pattern: "*.hist" + - fastk_ktab: + type: file + description: Histogram ktab files from the program FastK (option -t) + pattern: "*.ktab*" + - assembly: + type: file + description: Genome (primary) assembly files (fasta format) + pattern: ".fasta" + - haplotigs: + type: file + description: Assembly haplotigs (fasta format) + pattern: ".fasta" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - stats: + type: file + description: Assembly statistics file + pattern: "*.completeness.stats" + - bed: + type: file + description: Assembly only kmer positions not supported by reads in bed format + pattern: "*_only.bed" + - spectra_cn_fl_png: + type: file + description: "Unstacked copy number spectra filled plot in PNG format" + pattern: "*.spectra-cn.fl.png" + - spectra_cn_ln_png: + type: file + description: "Unstacked copy number spectra line plot in PNG format" + pattern: "*.spectra-cn.ln.png" + - spectra_cn_st_png: + type: file + description: "Stacked copy number spectra line plot in PNG format" + pattern: "*.spectra-cn.st.png" + - spectra_asm_fl_png: + type: file + description: "Unstacked assembly spectra filled plot in PNG format" + pattern: "*.spectra-asm.fl.png" + - spectra_asm_ln_png: + type: file + description: "Unstacked assembly spectra line plot in PNG format" + pattern: "*.spectra-asm.ln.png" + - spectra_asm_st_png: + type: file + description: "Stacked assembly spectra line plot in PNG format" + pattern: "*.spectra-asm.st.png" + - spectra_cn_fl_pdf: + type: file + description: "Unstacked copy number spectra filled plot in PDF format" + pattern: "*.spectra-cn.fl.pdf" + - spectra_cn_ln_pdf: + type: file + description: "Unstacked copy number spectra line plot in PDF format" + pattern: "*.spectra-cn.ln.pdf" + - spectra_cn_st_pdf: + type: file + description: "Stacked copy number spectra line plot in PDF format" + pattern: "*.spectra-cn.st.pdf" + - spectra_asm_fl_pdf: + type: file + description: "Unstacked assembly spectra filled plot in PDF format" + pattern: "*.spectra-asm.fl.pdf" + - spectra_asm_ln_pdf: + type: file + description: "Unstacked assembly spectra line plot in PDF format" + pattern: "*.spectra-asm.ln.pdf" + - spectra_asm_st_pdf: + type: file + description: "Stacked assembly spectra line plot in PDF format" + pattern: "*.spectra-asm.st.pdf" + - assembly_qv: + type: file + description: "error and qv table for each scaffold of the assembly" + pattern: "*.qv" + - qv: + type: file + description: "error and qv of each assembly as a whole" + pattern: "*.qv" + +authors: + - "@mahesh-panchal" diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf new file mode 100755 index 00000000..c2337b11 --- /dev/null +++ b/subworkflows/local/kmer.nf @@ -0,0 +1,134 @@ +#!/usr/bin/env nextflow + +// +// Adapted from https://github.com/sanger-tol/genomeassembly +// the Sanger genomeassembly pipeline by @ksenia-krasheninnikova +// +// Convert BAM to CRAM, create index and calculate statistics +// + +// +// MODULE IMPORT BLOCK +// +include { CAT_CAT } from "../../modules/nf-core/cat/cat/main" +include { FASTK_FASTK } from "../../modules/nf-core/fastk/fastk/main" +include { MERQURYFK_MERQURYFK } from '../../modules/nf-core/merquryfk/merquryfk/main' + +workflow KMER { + take: + reference_tuple // Channel [ val(meta), path(file) ] + reads_path // Channel: [ val(meta), val( str ) ] + + main: + ch_versions = Channel.empty() + + // + // LOGIC: PREPARE GET_READS_FROM_DIRECTORY INPUT + // + reference_tuple + .combine( reads_path ) + .map { meta, ref, reads_path -> + tuple( + [ id : meta.id, + single_end : true ], + reads_path + ) + } + .set { get_reads_input } + + get_reads_input.view() + + // + // MODULE: GETS PACBIO READ PATHS FROM READS_PATH + // + ch_grabbed_read_paths = GrabFiles( get_reads_input ) + + ch_grabbed_read_paths.view() + + // + // LOGIC: PACBIO READS FILES TO CHANNEL + // + // ch_grabbed_read_paths + // .map { meta, files -> + // tuple( files ) + // } + // .flatten() + // .set { ch_reads } + + // ch_reads.view() + // // + // // LOGIC: + // // + // reads_path + // .flatMap { meta, reads -> + // reads instanceof List ? reads.collect{ [ meta, it ] } : [ [ meta, reads ] ] + // } + // .set{ reads_ch } + + // + // MODULE: + // + CAT_CAT( ch_grabbed_read_paths ) + ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) + + // + // LOGIC: + // + CAT_CAT.out.file_out + .map{ meta, reads -> + reads.getName().endsWith('gz') ? [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa.gz'] : [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa'] + } + .set{ ch_reads_merged } + + // + // LOGIC: + // + CAT_CAT.out.file_out + .join(ch_reads_merged) + .map{ meta, reads_old, reads_new -> + reads_old.renameTo(reads_new); + } + + // + // MODULE: + // + FASTK_FASTK( ch_reads_merged ) + ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first()) + + // + // LOGIC: + // + FASTK_FASTK.out.hist + .join(FASTK_FASTK.out.ktab) + .join(reference_tuple) + .map{ meta, hist, ktab, meta_ref, primary -> + // hap.size() ? [ meta, hist, ktab, primary, hap ] : + [ meta, hist, ktab, primary, [] ] + } + .set{ ch_merq } + + ch_merq.view() + // + // MODULE: + // + MERQURYFK_MERQURYFK ( ch_merq ) + ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first()) + + emit: + merquryk_completeness = MERQURYFK_MERQURYFK.out.stats // meta, stats + merquryk_qv = MERQURYFK_MERQURYFK.out.qv // meta, qv + versions = ch_versions.ifEmpty(null) +} + +process GrabFiles { + tag "${meta.id}" + executor 'local' + + input: + tuple val(meta), path("in") + + output: + tuple val(meta), path("in/*.fasta.gz") + + "true" +} \ No newline at end of file diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 1b7d2ac4..7c597c4b 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -36,6 +36,7 @@ include { LONGREAD_COVERAGE } from '../subworkflows/local/longread_coverage' include { TELO_FINDER } from '../subworkflows/local/telo_finder' include { BUSCO_ANNOTATION } from '../subworkflows/local/busco_annotation' include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' +include { KMER } from '../subworkflows/local/kmer' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -165,10 +166,10 @@ workflow TREEVAL { ) ch_versions = ch_versions.mix(GAP_FINDER.out.versions) - // - // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as - // file to generate a file containing sites of self-complementary sequnce. - // + // // + // // SUBWORKFLOW: Takes reference file, .genome file, mummer variables, motif length variable and as + // // file to generate a file containing sites of self-complementary sequnce. + // // SELFCOMP ( GENERATE_GENOME.out.reference_tuple, GENERATE_GENOME.out.dot_genome, @@ -235,6 +236,16 @@ workflow TREEVAL { ) ch_versions = ch_versions.mix(BUSCO_ANNOTATION.out.versions) + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + // KMER { + // GENERATE_GENOME.out.reference_tuple, + // GENERATE_GENOME.out.dot_genome, + // YAML_INPUT.out.pacbio_reads + // } + // ch_versions = ch_versions.mix(KMER.out.versions) + // // SUBWORKFLOW: Collates version data from prior subworflows // @@ -261,8 +272,8 @@ workflow TREEVAL { params.sample_id = YAML_INPUT.out.assembly_id.collect() params.rf_data = rf_data.collect() // reference data tuple( [ id, size, lineage, ticket ], file) - params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta - params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram + // params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta + // params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram emit: software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml diff --git a/workflows/treeval_plots.nf b/workflows/treeval_plots.nf new file mode 100755 index 00000000..fc6962ac --- /dev/null +++ b/workflows/treeval_plots.nf @@ -0,0 +1,110 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowTreeval.initialise(params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input ] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// IMPORT: SUBWORKFLOWS CALLED BY THE MAIN +// +include { YAML_INPUT } from '../subworkflows/local/yaml_input' +include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' +include { KMER } from '../subworkflows/local/kmer' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// IMPORT: Installed directly from nf-core/modules +// +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow TREEVAL_PLOTS { + + main: + ch_versions = Channel.empty() + + params.entry = 'PLOTS' + input_ch = Channel.fromPath(params.input, checkIfExists: true) + // + // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field + // + YAML_INPUT ( input_ch ) + + // + // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file + // + GENERATE_GENOME ( + YAML_INPUT.out.assembly_id, + YAML_INPUT.out.reference + ) + ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) + + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + KMER ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.pacbio_reads + ) + ch_versions = ch_versions.mix(KMER.out.versions) + + // + // SUBWORKFLOW: Collates version data from prior subworflows + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + params.sample_id = YAML_INPUT.out.assembly_id.collect() + + emit: + software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml + versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) + } + NfcoreTemplate.summary(workflow, params, log) + + TreeValProject.summary(workflow, params) + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index a7e06280..34e5bfaa 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -29,6 +29,7 @@ include { GAP_FINDER } from '../subworkflows/local/gap_finder' include { LONGREAD_COVERAGE } from '../subworkflows/local/longread_coverage' include { TELO_FINDER } from '../subworkflows/local/telo_finder' include { HIC_MAPPING } from '../subworkflows/local/hic_mapping' +include { KMER } from '../subworkflows/local/kmer' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -118,6 +119,15 @@ workflow TREEVAL_RAPID { YAML_INPUT.out.pacbio_reads ) ch_versions = ch_versions.mix(LONGREAD_COVERAGE.out.versions) + + // + // SUBWORKFLOW: Takes reads and assembly, produces kmer plot + // + // KMER ( + // GENERATE_GENOME.out.reference_tuple, + // YAML_INPUT.out.pacbio_reads + // ) + // ch_versions = ch_versions.mix(KMER.out.versions) // // SUBWORKFLOW: Collates version data from prior subworflows From 01c1a8cb2d8f936255d49f51461abb2fe856be33 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 24 Aug 2023 22:40:04 +0100 Subject: [PATCH 03/14] update resources for plots --- conf/base.config | 20 ++++++++++---------- conf/modules.config | 5 ----- subworkflows/local/kmer.nf | 9 ++++----- workflows/treeval.nf | 11 +++++------ workflows/treeval_rapid.nf | 10 +++++----- 5 files changed, 24 insertions(+), 31 deletions(-) diff --git a/conf/base.config b/conf/base.config index 009fa28c..84669f40 100755 --- a/conf/base.config +++ b/conf/base.config @@ -98,19 +98,19 @@ process { } // Standard parameters, covers most insecta - //withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { - // cpus = { check_max( 16 * 1, 'cpus' ) } - // memory = { check_max( 100.GB * task.attempt, 'memory' ) } - // time = { check_max( 20.h * task.attempt, 'time' ) } - //} - -// For Large complex genomes > 4Gb withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { - cpus = { check_max( 20 * 1, 'cpus' ) } - memory = { check_max( 400.GB * task.attempt, 'memory' ) } - time = { check_max( 300.h * task.attempt, 'time' ) } + cpus = { check_max( 16 * 1, 'cpus' ) } + memory = { check_max( 100.GB * task.attempt, 'memory' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } +// For Large complex genomes > 4Gb + //withName: '.*:.*:LONGREAD_COVERAGE:(MINIMAP2_ALIGN|MINIMAP2_ALIGN_SPLIT)' { + // cpus = { check_max( 20 * 1, 'cpus' ) } + // memory = { check_max( 400.GB * task.attempt, 'memory' ) } + // time = { check_max( 300.h * task.attempt, 'time' ) } + //} + withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT' { cpus = { check_max( 8 * 1, 'cpus' ) } } diff --git a/conf/modules.config b/conf/modules.config index c71c0323..e76c37a3 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -281,11 +281,6 @@ process { withName: FASTK_FASTK { ext.args = "-k31 -t" - publishDir = [ - path: { "${params.outdir}/kmer" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] } withName: MERQURYFK_MERQURYFK { diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf index c2337b11..2ade80a6 100755 --- a/subworkflows/local/kmer.nf +++ b/subworkflows/local/kmer.nf @@ -99,11 +99,10 @@ workflow KMER { // LOGIC: // FASTK_FASTK.out.hist - .join(FASTK_FASTK.out.ktab) - .join(reference_tuple) - .map{ meta, hist, ktab, meta_ref, primary -> - // hap.size() ? [ meta, hist, ktab, primary, hap ] : - [ meta, hist, ktab, primary, [] ] + .combine(FASTK_FASTK.out.ktab) + .combine(reference_tuple) + .map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary -> + tuple( meta_hist, hist, ktab, primary, []) } .set{ ch_merq } diff --git a/workflows/treeval.nf b/workflows/treeval.nf index 7c597c4b..af1e9d6a 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -239,12 +239,11 @@ workflow TREEVAL { // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - // KMER { - // GENERATE_GENOME.out.reference_tuple, - // GENERATE_GENOME.out.dot_genome, - // YAML_INPUT.out.pacbio_reads - // } - // ch_versions = ch_versions.mix(KMER.out.versions) + KMER ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.pacbio_reads + ) + ch_versions = ch_versions.mix(KMER.out.versions) // // SUBWORKFLOW: Collates version data from prior subworflows diff --git a/workflows/treeval_rapid.nf b/workflows/treeval_rapid.nf index 34e5bfaa..761e91e3 100755 --- a/workflows/treeval_rapid.nf +++ b/workflows/treeval_rapid.nf @@ -123,11 +123,11 @@ workflow TREEVAL_RAPID { // // SUBWORKFLOW: Takes reads and assembly, produces kmer plot // - // KMER ( - // GENERATE_GENOME.out.reference_tuple, - // YAML_INPUT.out.pacbio_reads - // ) - // ch_versions = ch_versions.mix(KMER.out.versions) + KMER ( + GENERATE_GENOME.out.reference_tuple, + YAML_INPUT.out.pacbio_reads + ) + ch_versions = ch_versions.mix(KMER.out.versions) // // SUBWORKFLOW: Collates version data from prior subworflows From e79e8b51c7bbc8f7f94a42f266761e89fbb11156 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 30 Aug 2023 15:15:31 +0100 Subject: [PATCH 04/14] Fix omitted stats --- workflows/treeval.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/treeval.nf b/workflows/treeval.nf index af1e9d6a..147d9db2 100755 --- a/workflows/treeval.nf +++ b/workflows/treeval.nf @@ -271,8 +271,8 @@ workflow TREEVAL { params.sample_id = YAML_INPUT.out.assembly_id.collect() params.rf_data = rf_data.collect() // reference data tuple( [ id, size, lineage, ticket ], file) - // params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta - // params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram + params.pb_data = LONGREAD_COVERAGE.out.ch_reporting.collect() // merged pacbio.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw fasta + params.cm_data = HIC_MAPPING.out.ch_reporting.collect() // merged cram.bam data tuple( [ id, size ], file ) | Should really be a collected list of the raw cram emit: software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml From 1e47b8b7fb02c1a955c5f734843db576457a3b9c Mon Sep 17 00:00:00 2001 From: William Eagles Date: Mon, 11 Sep 2023 22:31:52 +0100 Subject: [PATCH 05/14] Config update --- conf/base.config | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/conf/base.config b/conf/base.config index 84669f40..79828fa7 100755 --- a/conf/base.config +++ b/conf/base.config @@ -121,6 +121,12 @@ process { memory = { check_max( 25.GB * Math.ceil( task.attempt * 2 ), 'memory' ) } } + // For larger + //withName:MUMMER { + // cpus = { check_max( 12 * task.attempt, 'cpus' ) } + // memory = { check_max( 50.GB * Math.ceil( task.attempt * 2 ), 'memory' ) } + //} + withName:UCSC_BEDGRAPHTOBIGWIG { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 20.GB * task.attempt, 'memory' ) } @@ -166,16 +172,22 @@ process { memory = { check_max( 100.GB * task.attempt, 'memory' ) } } + withName: BUSCO { + cpus = { check_max( 16 * task.attempt, 'cpus' ) } + memory = { check_max( 50.GB * task.attempt, 'memory' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } + } + + // Large Genomes > 4Gb //withName: BUSCO { - //cpus = { check_max( 16 * task.attempt, 'cpus' ) } - //memory = { check_max( 50.GB * task.attempt, 'memory' ) } - //time = { check_max( 20.h * task.attempt, 'time' ) } + // cpus = { check_max( 30 * task.attempt, 'cpus' ) } + // memory = { check_max( 100.GB * task.attempt, 'memory' ) } + // time = { check_max( 300.h * task.attempt, 'time' ) } //} // Large Genomes > 4Gb - withName: BUSCO { - cpus = { check_max( 30 * task.attempt, 'cpus' ) } + withName: FASTK_FASTK { + cpus = { check_max( 25 * task.attempt, 'cpus' ) } memory = { check_max( 100.GB * task.attempt, 'memory' ) } - time = { check_max( 300.h * task.attempt, 'time' ) } } } From fe7b2d019f1c3027e2e2880cbd573506fb710c58 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 28 Sep 2023 10:19:35 +0100 Subject: [PATCH 06/14] Merge and correct output --- conf/modules.config | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 59f5af8b..ed849a35 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,6 +19,14 @@ process { ] } + withName: MERQURYFK_MERQURYFK { + publishDir = [ + path: { "${params.outdir}/hic_files" }, + mode: params.publish_dir_mode, + pattern: '*.ref.spectra-cn.ln.png' + ] + } + // Files to be uploaded to the TreeVal JBrowse2 instance // .genome, .gz.{tbi|csi}, .bigBed, .bigWig, .paf withName: 'GENERATE_GENOME_FILE|TABIX_BGZIPTABIX|UCSC_BEDTOBIGBED|UCSC_BEDGRAPHTOBIGWIG|.*:.*:SYNTENY:MINIMAP2_ALIGN|.*:.*:GENERATE_GENOME:GNU_SORT' { From e24683734be2389e3ec1cff8590fdaef71547d71 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Mon, 2 Oct 2023 15:30:07 +0100 Subject: [PATCH 07/14] Remove extra files from output --- conf/modules.config | 12 ++---------- docs/output.md | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index ed849a35..9763c95e 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -19,14 +19,6 @@ process { ] } - withName: MERQURYFK_MERQURYFK { - publishDir = [ - path: { "${params.outdir}/hic_files" }, - mode: params.publish_dir_mode, - pattern: '*.ref.spectra-cn.ln.png' - ] - } - // Files to be uploaded to the TreeVal JBrowse2 instance // .genome, .gz.{tbi|csi}, .bigBed, .bigWig, .paf withName: 'GENERATE_GENOME_FILE|TABIX_BGZIPTABIX|UCSC_BEDTOBIGBED|UCSC_BEDGRAPHTOBIGWIG|.*:.*:SYNTENY:MINIMAP2_ALIGN|.*:.*:GENERATE_GENOME:GNU_SORT' { @@ -292,9 +284,9 @@ process { withName: MERQURYFK_MERQURYFK { publishDir = [ - path: { "${params.outdir}/kmer_plot" }, + path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*.ref.spectra-cn.ln.png' ] } } diff --git a/docs/output.md b/docs/output.md index e760d706..e9ca85c9 100755 --- a/docs/output.md +++ b/docs/output.md @@ -218,6 +218,22 @@ This worflows searches along predetermined path for syntenic genome files based ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) +## kmer + +This worflows performs a k-mer count using [FASTK_FASTK](https://nf-co.re/modules/fastk_fastk) then passes the results to [MERQURYFK_MERQURYFK](https://nf-co.re/modules/merquryfk_merquryfk) to plot a copy-number k-mer spectra. + +
+Output files + +- `hic_files/` + - `*.ref.spectra-cn.ln.png`: .png file of copy number k-mer spectra. + +
+ +![Synteny workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_synteny.jpeg) + +![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) + ## pipeline-information [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. From dbc3bca0eb3dbf0d4bfa2c96cab001110531a6aa Mon Sep 17 00:00:00 2001 From: Will Eagles <84074349+weaglesBio@users.noreply.github.com> Date: Mon, 2 Oct 2023 15:35:23 +0100 Subject: [PATCH 08/14] Update output.md --- docs/output.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index e9ca85c9..cb677d2b 100755 --- a/docs/output.md +++ b/docs/output.md @@ -21,6 +21,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [selfcomp](#selfcomp) - Identifies regions of self-complementary sequence. - [synteny](#synteny) - Generates syntenic alignments between other high quality genomes. - [busco-analysis](#busco-analysis) - Uses BUSCO to identify ancestral elements. Also use to identify ancestral Lepidopteran genes (merian units). +- [kmer](#kmer) - Counts k-mer and generates a copy number spectra plot. - [pipeline-information](#pipeline-information) - Report metrics generated during the workflow execution @@ -230,8 +231,6 @@ This worflows performs a k-mer count using [FASTK_FASTK](https://nf-co.re/module -![Synteny workflow](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_synteny.jpeg) - ![Workflow Legend](https://raw.githubusercontent.com/sanger-tol/treeval/dev/docs/images/treeval_1_0_legend.jpeg) ## pipeline-information From bcc9cacd1b8f2a9f2c808e059f8bc9e8c4639aba Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 12 Oct 2023 10:46:59 +0100 Subject: [PATCH 09/14] Prettier --- main.nf | 2 +- modules.json | 146 +++++++++++++-------------------------------------- 2 files changed, 38 insertions(+), 110 deletions(-) diff --git a/main.nf b/main.nf index 149658ce..e7e17337 100755 --- a/main.nf +++ b/main.nf @@ -41,7 +41,7 @@ workflow SANGERTOL_TREEVAL_RAPID { } // -// WORKFLOW: +// WORKFLOW: // workflow SANGERTOL_TREEVAL_PLOTS { TREEVAL_PLOTS () diff --git a/modules.json b/modules.json index 42285c90..2226ccf7 100755 --- a/modules.json +++ b/modules.json @@ -8,255 +8,183 @@ "bedtools/bamtobed": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/genomecov": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/intersect": { "branch": "master", "git_sha": "c1532c77717ad7c64752b26b0fd9b4556bdef272", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/makewindows": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/map": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/merge": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/sort": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "busco": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bwamem2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/cat": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/cload": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cooler/zoomify": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/getchromsizes": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff" }, "fastk/fastk": { "branch": "master", "git_sha": "29e87a37ae1887fc8289f2f56775604a71715cb9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gnu/sort": { "branch": "master", "git_sha": "88f6e982fb8bd40488d837b3b08a65008e602840", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "merquryfk/merquryfk": { "branch": "master", "git_sha": "6f150e1503c0826c21fedf1fa566cdbecbe98ec7", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "minimap2/index": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "miniprot/align": { "branch": "master", "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "miniprot/index": { "branch": "master", "git_sha": "8d737766e8f3c1417212b4b56acb959f3c356d26", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mummer": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "paftools/sam2paf": { "branch": "master", "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextmap": { "branch": "master", "git_sha": "decfb802f2e573efb7b44ff06b11ecf16853054d", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pretextsnapshot": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/markdup": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", "git_sha": "0460d316170f75f323111b4a2c0a2989f0c32013", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "a0f7be95788366c1923171e358da7d049eb440f9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/view": { "branch": "master", "git_sha": "3ffae3598260a99e8db3207dead9f73f87f90d1f", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "seqtk/cutn": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "5e7b1ef9a5a2d9258635bcbf70fcf37dacd1b247", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "ucsc/bedgraphtobigwig": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "ucsc/bedtobigbed": { "branch": "master", "git_sha": "9e51255c4f8ec69fb6ccf68593392835f14fecb8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "windowmasker/mk_counts": { "branch": "master", "git_sha": "30c3ed32e8bd5ddaf349ba2f4f99d38182fdc08c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", "git_sha": "726ee59cd9360a965d96ea9ea8770f16b8ddd6cc", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -265,4 +193,4 @@ } } } -} \ No newline at end of file +} From c61492590c30e80437cb3e46d781ab54d252eaf5 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 12 Oct 2023 11:47:09 +0100 Subject: [PATCH 10/14] Remove unneeded plots entry --- main.nf | 8 --- workflows/treeval_plots.nf | 110 ------------------------------------- 2 files changed, 118 deletions(-) delete mode 100755 workflows/treeval_plots.nf diff --git a/main.nf b/main.nf index e7e17337..6f2c8992 100755 --- a/main.nf +++ b/main.nf @@ -24,7 +24,6 @@ WorkflowMain.initialise( workflow, params, log ) include { TREEVAL } from './workflows/treeval' include { TREEVAL_RAPID } from './workflows/treeval_rapid' -include { TREEVAL_PLOTS } from './workflows/treeval_plots' // // WORKFLOW: RUN MAIN PIPELINE GENERATING ALL OUTPUT @@ -40,13 +39,6 @@ workflow SANGERTOL_TREEVAL_RAPID { TREEVAL_RAPID () } -// -// WORKFLOW: -// -workflow SANGERTOL_TREEVAL_PLOTS { - TREEVAL_PLOTS () -} - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS diff --git a/workflows/treeval_plots.nf b/workflows/treeval_plots.nf deleted file mode 100755 index fc6962ac..00000000 --- a/workflows/treeval_plots.nf +++ /dev/null @@ -1,110 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) - -// Validate input parameters -WorkflowTreeval.initialise(params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// IMPORT: SUBWORKFLOWS CALLED BY THE MAIN -// -include { YAML_INPUT } from '../subworkflows/local/yaml_input' -include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' -include { KMER } from '../subworkflows/local/kmer' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// IMPORT: Installed directly from nf-core/modules -// -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow TREEVAL_PLOTS { - - main: - ch_versions = Channel.empty() - - params.entry = 'PLOTS' - input_ch = Channel.fromPath(params.input, checkIfExists: true) - // - // SUBWORKFLOW: reads the yaml and pushing out into a channel per yaml field - // - YAML_INPUT ( input_ch ) - - // - // SUBWORKFLOW: Takes input fasta file and sample ID to generate a my.genome file - // - GENERATE_GENOME ( - YAML_INPUT.out.assembly_id, - YAML_INPUT.out.reference - ) - ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) - - // - // SUBWORKFLOW: Takes reads and assembly, produces kmer plot - // - KMER ( - GENERATE_GENOME.out.reference_tuple, - YAML_INPUT.out.pacbio_reads - ) - ch_versions = ch_versions.mix(KMER.out.versions) - - // - // SUBWORKFLOW: Collates version data from prior subworflows - // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - params.sample_id = YAML_INPUT.out.assembly_id.collect() - - emit: - software_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.yml - versions_ch = CUSTOM_DUMPSOFTWAREVERSIONS.out.versions -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log) - } - NfcoreTemplate.summary(workflow, params, log) - - TreeValProject.summary(workflow, params) - -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ From 63ea88df5514554c5f5cd9935dea44db170298e0 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 12 Oct 2023 13:02:31 +0100 Subject: [PATCH 11/14] Clear up plots entry --- main.nf | 6 ------ 1 file changed, 6 deletions(-) diff --git a/main.nf b/main.nf index 6f2c8992..9a7b3fba 100755 --- a/main.nf +++ b/main.nf @@ -56,12 +56,6 @@ workflow RAPID { SANGERTOL_TREEVAL_RAPID () } -workflow PLOTS { - SANGERTOL_TREEVAL_PLOTS () -} - - - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END From c198df737d60a4561b68d395399d3e6591cd0b96 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 12 Oct 2023 15:09:27 +0100 Subject: [PATCH 12/14] Clean up comments --- subworkflows/local/kmer.nf | 39 +++++++------------------------------- 1 file changed, 7 insertions(+), 32 deletions(-) diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf index 2ade80a6..5471c493 100755 --- a/subworkflows/local/kmer.nf +++ b/subworkflows/local/kmer.nf @@ -4,7 +4,7 @@ // Adapted from https://github.com/sanger-tol/genomeassembly // the Sanger genomeassembly pipeline by @ksenia-krasheninnikova // -// Convert BAM to CRAM, create index and calculate statistics +// Use FastK to count K-mers, plot spectra using MerquryFK // // @@ -36,43 +36,19 @@ workflow KMER { } .set { get_reads_input } - get_reads_input.view() - // // MODULE: GETS PACBIO READ PATHS FROM READS_PATH // ch_grabbed_read_paths = GrabFiles( get_reads_input ) - ch_grabbed_read_paths.view() - - // - // LOGIC: PACBIO READS FILES TO CHANNEL - // - // ch_grabbed_read_paths - // .map { meta, files -> - // tuple( files ) - // } - // .flatten() - // .set { ch_reads } - - // ch_reads.view() - // // - // // LOGIC: - // // - // reads_path - // .flatMap { meta, reads -> - // reads instanceof List ? reads.collect{ [ meta, it ] } : [ [ meta, reads ] ] - // } - // .set{ reads_ch } - // - // MODULE: + // MODULE: JOIN PACBIO READ // CAT_CAT( ch_grabbed_read_paths ) ch_versions = ch_versions.mix(CAT_CAT.out.versions.first()) // - // LOGIC: + // LOGIC: PRODUCE MERGED READS // CAT_CAT.out.file_out .map{ meta, reads -> @@ -81,7 +57,7 @@ workflow KMER { .set{ ch_reads_merged } // - // LOGIC: + // LOGIC: PREPARE FASTK INPUT // CAT_CAT.out.file_out .join(ch_reads_merged) @@ -90,13 +66,13 @@ workflow KMER { } // - // MODULE: + // MODULE: COUNT KMERS // FASTK_FASTK( ch_reads_merged ) ch_versions = ch_versions.mix(FASTK_FASTK.out.versions.first()) // - // LOGIC: + // LOGIC: PREPARE MERQURYFK INPUT // FASTK_FASTK.out.hist .combine(FASTK_FASTK.out.ktab) @@ -106,9 +82,8 @@ workflow KMER { } .set{ ch_merq } - ch_merq.view() // - // MODULE: + // MODULE: USE KMER HISTOGRAM TO PRODUCE SPECTRA // MERQURYFK_MERQURYFK ( ch_merq ) ch_versions = ch_versions.mix(MERQURYFK_MERQURYFK.out.versions.first()) From 6ee71c77cc4267ae52d4c2b3cab023d8c5652dfa Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Thu, 12 Oct 2023 16:50:12 +0100 Subject: [PATCH 13/14] Update modules.config Removing pre.bed as an output --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 9763c95e..d12a9ebb 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,7 +41,7 @@ process { // Files to be used for pretext, likely to be deleted once the hic workflow is complete. // .bed, .hr.pretext, .lr.pretext, needs centromere} - withName: 'SEQTK_CUTN|GAP_LENGTH|PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES|GET_PAIRED_CONTACT_BED' { + withName: 'SEQTK_CUTN|GAP_LENGTH|PRETEXTMAP_HIGHRES|PRETEXTMAP_STANDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES' { publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, From c23127c0a43607822b851b26186cdeae696df64d Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Wed, 18 Oct 2023 11:52:58 +0100 Subject: [PATCH 14/14] Update ci.yml Temp removal of FULL Test to allow for merge. --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b7e07eca..07b39229 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -45,7 +45,7 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -entry RAPID -profile test_github,docker --outdir ./results-rapid - - name: Run FULL pipeline with test data - # Remember that you can parallelise this by using strategy.matrix - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./results-full + #- name: Run FULL pipeline with test data + # # Remember that you can parallelise this by using strategy.matrix + # run: | + # nextflow run ${GITHUB_WORKSPACE} -profile test_github,docker --outdir ./results-full