diff --git a/nevermore/modules/align/bowtie2.nf b/nevermore/modules/align/bowtie2.nf new file mode 100644 index 0000000..9b51129 --- /dev/null +++ b/nevermore/modules/align/bowtie2.nf @@ -0,0 +1,76 @@ +process bowtie2_build { + container "quay.io/biocontainers/bowtie2:2.5.3--py39h6fed5c7_1" + tag "${sample.id}" + + input: + tuple val(sample), path(genomeseq) + + output: + tuple val(sample), path("${sample.id}/bowtie2/${sample.id}*"), emit: index + + script: + """ + mkdir -p ${sample.id}/bowtie2/ + + gzip -dc ${genomeseq} > genome.fa + + bowtie2-build --threads ${task.cpus} -f genome.fa ${sample.id}/bowtie2/${sample.id} + + rm -vf genome.fa + """ + +} + + +process bowtie2_align { + // container "quay.io/biocontainers/bowtie2:2.5.3--py39h6fed5c7_1" + container "registry.git.embl.de/schudoma/bowtie2-docker:latest" + tag "${sample.id}" + + input: + tuple val(sample), path(fastqs), path(index) + + output: + tuple val(sample), path("${sample.id}/bowtie2_align/${sample.id}.bam"), emit: bam + tuple val(sample), path("${sample.id}/bowtie2_align/${sample.id}.bam.bai"), emit: bai + tuple val(sample), path("${sample.id}.BOWTIE2.DONE"), emit: sentinel + + script: + + def input_files = "" + def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) + def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + + if (r1_files.size() != 0 && r2_files.size() != 0) { + input_files += "-1 ${r1_files.join(' ')} -2 ${r2_files.join(' ')}" + single_reads = false + } else if (r1_files.size() != 0) { + input_files += "-U ${r1_files.join(' ')}" + } else if (r2_files.size() != 0) { + input_files += "-U ${r2_files.join(' ')}" + } else if (orphans.size() != 0) { + input_files += "-U ${orphans.join(' ')}" + } + + // --fr/--rf/--ff + def threads = task.cpus.intdiv(2) + def bowtie2_options = "-p ${threads} -q --phred33" + + def index_id = index[0].name.replaceAll(/.[0-9]+.bt2[l]?$/, "") + // -S ${sample.id}/hisat2_align/${sample.id}.sam + // index_id=\$(ls ${index[0]} | sed 's/\\.[0-9]\\+\\.ht2\$//') + """ + mkdir -p ${sample.id}/bowtie2_align/ tmp/ + + export TMPDIR=tmp/ + + bowtie2 -x ${index_id} ${bowtie2_options} ${input_files} > ${sample.id}.sam + samtools sort -@ ${threads} ${sample.id}.sam > ${sample.id}/bowtie2_align/${sample.id}.bam + samtools index ${sample.id}/bowtie2_align/${sample.id}.bam + rm -fv ${sample.id}.sam + + touch ${sample.id}.BOWTIE2.DONE + """ + +} \ No newline at end of file diff --git a/nevermore/modules/align/bwa.nf b/nevermore/modules/align/bwa.nf index a39af5d..1e23e22 100644 --- a/nevermore/modules/align/bwa.nf +++ b/nevermore/modules/align/bwa.nf @@ -1,5 +1,5 @@ process bwa_mem_align { - container "docker://registry.git.embl.de/schudoma/align-docker:latest" + container "registry.git.embl.de/schudoma/align-docker:latest" label 'align' input: diff --git a/nevermore/modules/align/helpers.nf b/nevermore/modules/align/helpers.nf index cf04964..b6c0efd 100644 --- a/nevermore/modules/align/helpers.nf +++ b/nevermore/modules/align/helpers.nf @@ -1,5 +1,5 @@ process merge_and_sort { - container "docker://quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + container "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" label 'samtools' input: @@ -7,8 +7,8 @@ process merge_and_sort { val(do_name_sort) output: - tuple val(sample), path("bam/${sample}.bam"), emit: bam - tuple val(sample), path("stats/bam/${sample}.flagstats.txt"), emit: flagstats + tuple val(sample), path("bam/${sample.id}.bam"), emit: bam + tuple val(sample), path("stats/bam/${sample.id}.flagstats.txt"), emit: flagstats script: def sort_order = (do_name_sort) ? "-n" : "" @@ -16,21 +16,55 @@ process merge_and_sort { // need a better detection for this if (bamfiles instanceof Collection && bamfiles.size() >= 2) { - merge_cmd = "samtools merge -@ $task.cpus ${sort_order} bam/${sample}.bam ${bamfiles}" + merge_cmd = "samtools merge -@ $task.cpus ${sort_order} bam/${sample.id}.bam ${bamfiles}" } else { - merge_cmd = "ln -s ../${bamfiles[0]} bam/${sample}.bam" + merge_cmd = "ln -s ../${bamfiles[0]} bam/${sample.id}.bam" } """ mkdir -p bam/ stats/bam/ ${merge_cmd} - samtools flagstats bam/${sample}.bam > stats/bam/${sample}.flagstats.txt + samtools flagstats bam/${sample.id}.bam > stats/bam/${sample.id}.flagstats.txt + """ +} + + +process merge_sam { + container "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + label 'samtools' + + input: + tuple val(sample), path(samfiles) + // val(do_name_sort) + + output: + tuple val(sample), path("sam/${sample.id}.sam"), emit: sam + tuple val(sample), path("stats/sam/${sample.id}.flagstats.txt"), emit: flagstats + + script: + // def sort_order = (do_name_sort) ? "-n" : "" + def merge_cmd = "" + + // need a better detection for this + if (samfiles instanceof Collection && samfiles.size() >= 2) { + // merge_cmd = "samtools merge -@ $task.cpus ${sort_order} bam/${sample.id}.bam ${bamfiles}" + merge_cmd += "samtools view --no-PG -Sh ${samfiles[0]} > sam/${sample.id}.sam\n" + merge_cmd += "samtools view -S ${samfiles[1]} >> sam/${sample.id}.sam" + + } else { + merge_cmd = "ln -s ../${samfiles[0]} sam/${sample.id}.sam" + } + + """ + mkdir -p sam/ stats/sam/ + ${merge_cmd} + samtools flagstats sam/${sample.id}.sam > stats/sam/${sample.id}.flagstats.txt """ } process db_filter { - container "docker://quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + container "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" label 'samtools' input: @@ -51,7 +85,7 @@ process db_filter { process readcount { - container "docker://quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + container "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" label 'samtools' input: diff --git a/nevermore/modules/align/hisat2.nf b/nevermore/modules/align/hisat2.nf new file mode 100644 index 0000000..ba0b49d --- /dev/null +++ b/nevermore/modules/align/hisat2.nf @@ -0,0 +1,86 @@ +process hisat2_build { + container "quay.io/biocontainers/hisat2:2.2.1--hdbdd923_6" + // we need a hisat2/samtools mixed container + // container "registry.git.embl.de/schudoma/hisat2-docker:latest" + + input: + tuple val(sample), path(genomeseq) + + output: + tuple val(sample), path("${sample.id}/hisat2/${sample.id}*"), emit: index + + script: + """ + mkdir -p ${sample.id}/hisat2/ + + gzip -dc ${genomeseq} > genome.fa + + hisat2-build -f genome.fa ${sample.id}/hisat2/${sample.id} + + rm -vf genome.fa + """ + +} + +process hisat2_align { + // container "quay.io/biocontainers/hisat2:2.2.1--hdbdd923_6" + // we need a hisat2/samtools mixed container + container "registry.git.embl.de/schudoma/hisat2-docker:latest" + + input: + tuple val(sample), path(fastqs), path(index) + + output: + // tuple val(sample), path("${sample.id}/hisat2_align/${sample.id}.bam"), path("${sample.id}/hisat2_align/${sample.id}.bam.bai"), emit: bam + tuple val(sample), path("${sample.id}/hisat2_align/${sample.id}.bam"), emit: bam + tuple val(sample), path("${sample.id}/hisat2_align/${sample.id}.bam.bai"), emit: bai + tuple val(sample), path("${sample.id}.HISAT2.DONE"), emit: sentinel + + script: + + def input_files = "" + def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) + def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + + if (r1_files.size() != 0 && r2_files.size() != 0) { + input_files += "-1 ${r1_files.join(' ')} -2 ${r2_files.join(' ')}" + single_reads = false + } else if (r1_files.size() != 0) { + input_files += "-U ${r1_files.join(' ')}" + } else if (r2_files.size() != 0) { + input_files += "-U ${r2_files.join(' ')}" + } else if (orphans.size() != 0) { + input_files += "-U ${orphans.join(' ')}" + } + + // --fr/--rf/--ff + def threads = task.cpus.intdiv(2) + def hisat2_options = "-p ${threads} -q --phred33" + if (params.hisat2_no_spliced_alignment) { + // --no-spliced-alignment + hisat2_options += " --no-spliced-alignment" + } + + def index_id = index[0].name.replaceAll(/.[0-9]+.ht2$/, "") + // -S ${sample.id}/hisat2_align/${sample.id}.sam + // index_id=\$(ls ${index[0]} | sed 's/\\.[0-9]\\+\\.ht2\$//') + """ + mkdir -p ${sample.id}/hisat2_align/ tmp/ + + export TMPDIR=tmp/ + + hisat2 -x ${index_id} ${hisat2_options} ${input_files} > ${sample.id}.sam + samtools sort -@ ${threads} ${sample.id}.sam > ${sample.id}/hisat2_align/${sample.id}.bam + samtools index ${sample.id}/hisat2_align/${sample.id}.bam + rm -fv ${sample.id}.sam + + touch ${sample.id}.HISAT2.DONE + """ + // echo "tmpdir is \$TMPDIR" + + // hisat2 -x ${sample.id} ${hisat2_options} ${input_files} > ${sample.id}.sam +} + + + diff --git a/nevermore/modules/align/minimap2.nf b/nevermore/modules/align/minimap2.nf new file mode 100644 index 0000000..ba30067 --- /dev/null +++ b/nevermore/modules/align/minimap2.nf @@ -0,0 +1,50 @@ +process minimap2_align { + container "registry.git.embl.de/schudoma/minimap2-docker:latest" + label 'align' + + input: + tuple val(sample), path(fastqs) + path(reference) + val(do_name_sort) + + output: + tuple val(sample), path("${sample.id}/${sample.id}.sam"), emit: sam + + script: + // def reads = (sample.is_paired) ? "${sample.id}_R1.fastq.gz ${sample.id}_R2.fastq.gz" : "${sample.id}_R1.fastq.gz" + + def input_files = "" + def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) + def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + + if (r1_files.size() != 0 && r2_files.size() != 0) { + input_files += "${r1_files.join(' ')} ${r2_files.join(' ')}" + single_reads = false + } else if (r1_files.size() != 0) { + input_files += "${r1_files.join(' ')}" + } else if (r2_files.size() != 0) { + input_files += "${r2_files.join(' ')}" + } else if (orphans.size() != 0) { + input_files += "${orphans.join(' ')}" + } + + + + + def threads = task.cpus.intdiv(2) + // def mm_options = "--sam-hit-only -t ${threads} -x sr --secondary=yes -a" + def mm_options = "-t ${threads} -x sr --secondary=yes -a" + + // def sort_cmd = "| " + ((do_name_sort) ? "samtools collate -@ ${threads} -o ${sample.id}.bam - tmp/collated_bam" : "samtools sort -@ ${threads} -o ${sample.id}.bam -") + def sort_cmd = "" // we cannot convert large catalogue alignments to bam, hence we cannot properly sort those + + """ + set -e -o pipefail + + mkdir -p ${sample.id}/ tmp/ + minimap2 ${mm_options} --split-prefix ${sample.id}_split ${reference} ${input_files} ${sort_cmd} > ${sample.id}/${sample.id}.sam + + rm -rvf tmp/ + """ +} \ No newline at end of file diff --git a/nevermore/modules/align/sam_align.nf b/nevermore/modules/align/sam_align.nf index 2733881..5fb5736 100644 --- a/nevermore/modules/align/sam_align.nf +++ b/nevermore/modules/align/sam_align.nf @@ -1,5 +1,5 @@ process minimap2_align { - container "docker://quay.io/biocontainers/minimap2:2.28--he4a0461_0" + container "quay.io/biocontainers/minimap2:2.28--he4a0461_0" label 'align' input: @@ -21,7 +21,7 @@ process minimap2_align { process bwa_mem_align { - container "docker://quay.io/biocontainers/bwa:0.7.3a--he4a0461_9" + container "quay.io/biocontainers/bwa:0.7.3a--he4a0461_9" label 'align' input: diff --git a/nevermore/modules/collate.nf b/nevermore/modules/collate.nf index 9ca59a4..524a652 100644 --- a/nevermore/modules/collate.nf +++ b/nevermore/modules/collate.nf @@ -1,4 +1,6 @@ process collate_stats { + label "default" + input: path(stats_files) diff --git a/nevermore/modules/converters/bam2fq.nf b/nevermore/modules/converters/bam2fq.nf index 7a16824..d2d4ff3 100644 --- a/nevermore/modules/converters/bam2fq.nf +++ b/nevermore/modules/converters/bam2fq.nf @@ -1,5 +1,5 @@ process bam2fq { - container "docker://quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" + container "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1" input: tuple val(sample), path(bam) diff --git a/nevermore/modules/converters/fq2bam.nf b/nevermore/modules/converters/fq2bam.nf index b2ecc7b..64faae7 100644 --- a/nevermore/modules/converters/fq2bam.nf +++ b/nevermore/modules/converters/fq2bam.nf @@ -1,5 +1,5 @@ process fq2bam { - container "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" input: tuple val(sample), path(fq) diff --git a/nevermore/modules/converters/fq2fa.nf b/nevermore/modules/converters/fq2fa.nf index e204329..42256fd 100644 --- a/nevermore/modules/converters/fq2fa.nf +++ b/nevermore/modules/converters/fq2fa.nf @@ -1,5 +1,5 @@ process fq2fa { - container "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" input: tuple val(sample), path(fq) diff --git a/nevermore/modules/converters/merge_fastqs.nf b/nevermore/modules/converters/merge_fastqs.nf index c87e4bc..b2a2409 100644 --- a/nevermore/modules/converters/merge_fastqs.nf +++ b/nevermore/modules/converters/merge_fastqs.nf @@ -1,5 +1,6 @@ process merge_single_fastqs { - container "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" + label "medium" input: tuple val(sample), path(fastqs) diff --git a/nevermore/modules/converters/prepare_fastqs.nf b/nevermore/modules/converters/prepare_fastqs.nf deleted file mode 100644 index fd90775..0000000 --- a/nevermore/modules/converters/prepare_fastqs.nf +++ /dev/null @@ -1,21 +0,0 @@ -process prepare_fastqs { - input: - tuple val(sample), path(fq) - - output: - tuple val(sample), path("fastq/${sample.id}/${sample.id}_R*.fastq.gz"), emit: reads - - script: - if (sample.is_paired) { - """ - mkdir -p fastq/${sample.id} - ln -sf ../../${fq[0]} fastq/${sample.id}/${sample.id}_R1.fastq.gz - ln -sf ../../${fq[1]} fastq/${sample.id}/${sample.id}_R2.fastq.gz - """ - } else { - """ - mkdir -p fastq/${sample.id} - ln -sf ../../${fq[0]} fastq/${sample.id}/${sample.id}_R1.fastq.gz - """ - } -} diff --git a/nevermore/modules/decon/hostile.nf b/nevermore/modules/decon/hostile.nf new file mode 100644 index 0000000..7b281ad --- /dev/null +++ b/nevermore/modules/decon/hostile.nf @@ -0,0 +1,32 @@ + +params.hostile = [:] +params.hostile.aligner = "bowtie2" + +process hostile { + container "quay.io/biocontainers/hostile:1.1.0--pyhdfd78af_0" + + input: + tuple val(sample), path(fastqs) + path(db) + + output: + tuple val(sample), path("no_host/${sample.id}/*.fastq.gz"), emit: reads + + script: + + def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") } ) + def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) + + def r2_input = "" + if (r2_files.size() != 0) { + r2_input = "--fastq2 ${r2_files[0]}" + } + + """ + mkdir -p no_host/${sample.id} + + export HOSTILE_CACHE_DIR=\$(dirname \$(readlink ${db})) + + hostile clean --fastq1 ${r1_files[0]} ${r2_input} --aligner ${params.hostile.aligner} --index \$(readlink ${db}) --threads ${task.cpus} --out-dir no_host/${sample.id} --force + """ +} \ No newline at end of file diff --git a/nevermore/modules/decon/kraken2.nf b/nevermore/modules/decon/kraken2.nf index 30b4585..8cc7ed5 100644 --- a/nevermore/modules/decon/kraken2.nf +++ b/nevermore/modules/decon/kraken2.nf @@ -2,7 +2,7 @@ params.kraken2_min_hit_groups = 10 params.fix_read_ids = true process remove_host_kraken2 { - container "docker://registry.git.embl.de/schudoma/kraken2-docker:latest" + container "registry.git.embl.de/schudoma/kraken2-docker:latest" label 'kraken2' input: @@ -31,9 +31,9 @@ process remove_host_kraken2 { process remove_host_kraken2_individual { - container "docker://registry.git.embl.de/schudoma/kraken2-docker:latest" + container "registry.git.embl.de/schudoma/kraken2-docker:latest" label 'kraken2' - label "process_high" + label "large" input: tuple val(sample), path(fastqs) diff --git a/nevermore/modules/decon/sortmerna.nf b/nevermore/modules/decon/sortmerna.nf index a29428b..c6bbac1 100644 --- a/nevermore/modules/decon/sortmerna.nf +++ b/nevermore/modules/decon/sortmerna.nf @@ -1,5 +1,6 @@ process sortmerna { - container "docker://quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0" + container "quay.io/biocontainers/sortmerna:4.3.6--h9ee0642_0" + label "medium" input: tuple val(sample), path(fastqs) diff --git a/nevermore/modules/profilers/gffquant.nf b/nevermore/modules/profilers/gffquant.nf index dec0b0d..2bc4b7c 100644 --- a/nevermore/modules/profilers/gffquant.nf +++ b/nevermore/modules/profilers/gffquant.nf @@ -9,7 +9,8 @@ params.gq_ambig_mode = "1overN" process stream_gffquant { publishDir params.output_dir, mode: "copy" label "gffquant" - label "process_high" + label "large" + tag "gffquant.${sample}" input: @@ -20,7 +21,7 @@ process stream_gffquant { tuple val(sample), path("profiles/${sample}/*.{txt.gz,pd.txt}"), emit: results //, optional: (!params.gq_panda) ? true : false tuple val(sample), path("profiles/${sample}/*.{txt.gz,pd.txt}"), emit: profiles //, optional: (params.gq_panda) ? true : false tuple val(sample), path("logs/${sample}.log") - tuple val(sample), path("alignments/${sample}*.sam"), emit: alignments, optional: true + tuple val(sample), path("alignments/${sample}/${sample}*.sam"), emit: alignments, optional: true script: def gq_output = "-o profiles/${sample}/${sample}" @@ -65,12 +66,14 @@ process stream_gffquant { } def gq_cmd = "gffquant ${gq_output} ${gq_params} --db GQ_DATABASE --aligner ${params.gq_aligner} ${input_files}" + def mkdir_alignments = (params.keep_alignment_file != null && params.keep_alignment_file != false) ? "mkdir -p alignments/${sample}/" : "" // --reference \$(readlink ${reference}) // cp -v ${gq_db}/*sqlite3 GQ_DATABASE // ref=\$(ls ${gq_db}/*.bwt | sed "s/\.bwt//") """ set -e -o pipefail mkdir -p logs/ tmp/ profiles/ + ${mkdir_alignments} echo 'Copying database...' cp -v \$(dirname \$(readlink ${gq_db}))/*sqlite3 GQ_DATABASE @@ -149,22 +152,6 @@ process run_gffquant { params.gq_collate_columns = "uniq_scaled,combined_scaled" -// process collate_feature_counts { - -// input: -// tuple val(sample), path(count_tables), val(column) - -// output: -// path("collated/*.txt.gz"), emit: collated, optional: true - -// script: -// """ -// mkdir -p collated/ - -// collate_counts . -o collated/collated -c ${column} -// """ -// } - process collate_feature_counts { publishDir params.output_dir, mode: "copy" label "collate_profiles" diff --git a/nevermore/modules/profilers/kallisto.nf b/nevermore/modules/profilers/kallisto.nf index 7832f41..400f00e 100644 --- a/nevermore/modules/profilers/kallisto.nf +++ b/nevermore/modules/profilers/kallisto.nf @@ -1,5 +1,6 @@ process kallisto_index { - container "docker://quay.io/biocontainers/kallisto:0.50.1--hc877fd6_1" + container "quay.io/biocontainers/kallisto:0.50.1--hc877fd6_1" + label "medium" input: tuple val(sample), path(genes) @@ -11,15 +12,18 @@ process kallisto_index { """ mkdir -p kallisto/index/${sample.id}/ - kallisto index -i kallisto/index/${sample.id}.idx {genes} + kallisto index -i kallisto/index/${sample.id}.idx ${genes} """ } +params.profilers = [:] +params.profilers.kallisto = [:] params.profilers.kallisto.bootstrap = 100 process kallisto_quant { - container "docker://quay.io/biocontainers/kallisto:0.50.1--hc877fd6_1" + container "quay.io/biocontainers/kallisto:0.50.1--hc877fd6_1" + label "medium" input: tuple val(sample), path(fastqs), path(kallisto_index) @@ -50,8 +54,8 @@ process kallisto_quant { def calc_libstats = "" if (single_reads) { - calc_libstats += "mean_length=\$(gzip -dc ${input_files} | awk 'NR%4==2 {sum_len+=length($0); n+=1; } END { printf('%d\n', sum_len / n); }')\n" - calc_libstats += "std_length=\$(gzip -dc ${input_files} | awk -v mean=\$mean_length 'NR%4==2 {sum_len+=(length($0)-mean)**2; n+=1; } END { printf('%d\n', sqrt(sum_len/(n-1)))}')" + calc_libstats += "mean_length=\$(gzip -dc ${input_files} | awk 'NR%4==2 {sum_len+=length(\$0); n+=1; } END { printf('%d\n', sum_len / n); }')\n" + calc_libstats += "std_length=\$(gzip -dc ${input_files} | awk -v mean=\$mean_length 'NR%4==2 {sum_len+=(length(\$0)-mean)**2; n+=1; } END { printf('%d\n', sqrt(sum_len/(n-1)))}')" single_flags += "--single" single_flags += " -l \$mean_length -s \$std_length" diff --git a/nevermore/modules/profilers/metaphlan3.nf b/nevermore/modules/profilers/metaphlan3.nf index a21fde1..ce0b99f 100644 --- a/nevermore/modules/profilers/metaphlan3.nf +++ b/nevermore/modules/profilers/metaphlan3.nf @@ -1,5 +1,5 @@ process run_metaphlan3 { - container "docker://quay.io/biocontainers/metaphlan:3.1.0--pyhb7b1952_0" + container "quay.io/biocontainers/metaphlan:3.1.0--pyhb7b1952_0" input: tuple val(sample), path(fastqs) @@ -48,7 +48,7 @@ process run_metaphlan3 { } process combine_metaphlan3 { - container "docker://quay.io/biocontainers/metaphlan:3.1.0--pyhb7b1952_0" + container "quay.io/biocontainers/metaphlan:3.1.0--pyhb7b1952_0" input: tuple val(sample), path(bt2) @@ -69,7 +69,7 @@ process combine_metaphlan3 { process collate_metaphlan3_tables { - container "docker://quay.io/biocontainers/metaphlan:3.1.0--pyhb7b1952_0" + container "quay.io/biocontainers/metaphlan:3.1.0--pyhb7b1952_0" input: path(tables) diff --git a/nevermore/modules/profilers/metaphlan4.nf b/nevermore/modules/profilers/metaphlan4.nf index 7e73e4e..eb1c40f 100644 --- a/nevermore/modules/profilers/metaphlan4.nf +++ b/nevermore/modules/profilers/metaphlan4.nf @@ -1,5 +1,9 @@ process run_metaphlan4 { - container "docker://quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + publishDir params.output_dir, mode: "copy" + container "quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + tag "${sample.id}" + label "process_high" + label "metaphlan4" input: tuple val(sample), path(fastqs) @@ -78,7 +82,8 @@ process run_metaphlan4 { process combine_metaphlan4 { - container "docker://quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + container "quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + label "metaphlan4" input: tuple val(sample), path(bt2) @@ -99,7 +104,10 @@ process combine_metaphlan4 { process collate_metaphlan4_tables { - container "docker://quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + publishDir params.output_dir, mode: "copy" + container "quay.io/biocontainers/metaphlan:4.1.0--pyhca03a8a_0" + label "metaphlan4" + label "mini" input: path(tables) diff --git a/nevermore/modules/profilers/motus.nf b/nevermore/modules/profilers/motus.nf index caea0a8..3681230 100644 --- a/nevermore/modules/profilers/motus.nf +++ b/nevermore/modules/profilers/motus.nf @@ -4,19 +4,61 @@ params.motus_n_marker_genes = 3 process motus { - container "docker://quay.io/biocontainers/motus:3.1.0--pyhdfd78af_0" + container "quay.io/biocontainers/motus:3.1.0--pyhdfd78af_0" input: - tuple val(sample), path(reads) + tuple val(sample), path(fastqs) path(motus_db) output: - tuple val(sample), path("${sample.id}/${sample.id}.motus.txt"), emit: motus_out + tuple val(sample), path("${sample.id}/${sample.id}.motus.txt"), emit: motus_profile script: - def motus_input = (sample.is_paired) ? "-f ${sample.id}_R1.fastq.gz -r ${sample.id}_R2.fastq.gz" : "-s ${sample.id}_R1.fastq.gz"; + + def input_files = "" + def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } ) + def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } ) + + if (r1_files.size() != 0 && r2_files.size() != 0) { + input_files += "-f ${r1_files.join(' ')} -r ${r2_files.join(' ')}" + } else if (r1_files.size() != 0) { + input_files += "-s ${r1_files.join(' ')}" + } else if (r2_files.size() != 0) { + input_files += "-s ${r2_files.join(' ')}" + } else if (orphans.size() != 0) { + input_files += "-s ${orphans.join(' ')}" + } + + + // def motus_input = (sample.is_paired) ? "-f ${sample.id}_R1.fastq.gz -r ${sample.id}_R2.fastq.gz" : "-s ${sample.id}_R1.fastq.gz"; + """ mkdir -p ${sample.id} - motus profile -t $task.cpus -k ${params.motus_tax_level} -c -v 7 -q -l ${params.motus_min_length} -g ${params.motus_n_marker_genes} -db ${motus_db} ${motus_input} > ${sample.id}/${sample.id}.motus.txt + motus profile -n ${sample.id} -t $task.cpus -k ${params.motus_tax_level} -c -v 7 -q -l ${params.motus_min_length} -g ${params.motus_n_marker_genes} -db ${motus_db} ${input_files} > ${sample.id}/${sample.id}.motus.txt """ } + + + +process motus_merge { + container "quay.io/biocontainers/motus:3.1.0--pyhdfd78af_0" + + input: + path(profiles) + path(motus_db) + + output: + path("motus_profiles/motus_merged.txt") + + script: + """ + mkdir -p motus_profiles/ input/ + + for f in ${profiles}; do ln -sf ../\$f input/; done + + motus merge -db ${motus_db} -d input/ -o motus_profiles/motus_merged.txt + """ + + +} \ No newline at end of file diff --git a/nevermore/modules/profilers/salmon.nf b/nevermore/modules/profilers/salmon.nf index 69a857a..abd2935 100644 --- a/nevermore/modules/profilers/salmon.nf +++ b/nevermore/modules/profilers/salmon.nf @@ -1,7 +1,8 @@ params.profilers.salmon.index.k = 31 process salmon_index { - container "docker://quay.io/biocontainers/salmon:1.10.3--hecfa306_0" + container "quay.io/biocontainers/salmon:1.10.3--hecfa306_0" + label "medium" input: tuple val(sample), path(genes) @@ -23,7 +24,8 @@ process salmon_index { params.profilers.salmon.quant.libtype = "IU" process salmon_quant { - container "docker://quay.io/biocontainers/salmon:1.10.3--hecfa306_0" + container "quay.io/biocontainers/salmon:1.10.3--hecfa306_0" + label "medium" // label "align" input: diff --git a/nevermore/modules/qc/bbduk.nf b/nevermore/modules/qc/bbduk.nf index c836d53..76d6c19 100644 --- a/nevermore/modules/qc/bbduk.nf +++ b/nevermore/modules/qc/bbduk.nf @@ -2,8 +2,9 @@ params.qc_params_shotgun = "qtrim=rl trimq=3 maq=25 ktrim=r k=23 mink=11 hdist=1 params.qc_minlen = 45 process qc_bbduk { - container "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" label 'bbduk' + tag "${sample.id}" input: diff --git a/nevermore/modules/qc/bbduk_amplicon.nf b/nevermore/modules/qc/bbduk_amplicon.nf index a2b3282..a0ea41d 100644 --- a/nevermore/modules/qc/bbduk_amplicon.nf +++ b/nevermore/modules/qc/bbduk_amplicon.nf @@ -1,5 +1,5 @@ process qc_bbduk_stepwise_amplicon { - container "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" label 'bbduk' input: diff --git a/nevermore/modules/qc/bbmerge.nf b/nevermore/modules/qc/bbmerge.nf index 97cb2f0..d122a0a 100644 --- a/nevermore/modules/qc/bbmerge.nf +++ b/nevermore/modules/qc/bbmerge.nf @@ -1,5 +1,5 @@ process qc_bbmerge { - container "docker://quay.io/biocontainers/bbmap:39.06--h92535d8_0" + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" label "bbduk" input: @@ -19,3 +19,46 @@ process qc_bbmerge { bbmerge.sh -Xmx${maxmem}g t=${task.cpus} ${merge_params} in=${sample}_R1.fastq.gz in2=${sample}_R2.fastq.gz out=${sample}.singles/${sample}.singles_M.fastq.gz outu1=${sample}/${sample}_R1.fastq.gz outu2=${sample}/${sample}_R2.fastq.gz """ } + +params.bbmerge_insert_size = "ecct extend2=20 iterations=5 k=62 adapter=default" + + +process qc_bbmerge_insert_size { + container "quay.io/biocontainers/bbmap:39.06--h92535d8_0" + label "bbduk" + + input: + tuple val(sample), path(reads) + + output: + tuple val(sample), path("${sample.id}/bbmerge/${sample.id}.ihist.txt"), emit: isize_hist + tuple val(sample), path("${sample.id}/bbmerge/${sample.id}.inserts.txt"), emit: inserts + tuple val(sample), path("${sample.id}/bbmerge/${sample.id}.adapters.txt"), emit: adapters + + script: + def maxmem = task.memory.toGiga() + def compression = (reads[0].name.endsWith("gz")) ? "gz" : "bz2" + + def r1_files = reads.findAll( { it.name.endsWith("_R1.fastq.${compression}") } ) + def r2_files = reads.findAll( { it.name.endsWith("_R2.fastq.${compression}") } ) + + def read1 = "" + def read2 = "" + def orphans = "" + if (r1_files.size() != 0) { + read1 += "in1=${r1_files[0]}" + if (r2_files.size() != 0) { + read2 += "in2=${r2_files[0]}" + } + } + + """ + mkdir -p ${sample.id}/bbmerge/ + + bbmerge.sh -Xmx${maxmem}g t=${task.cpus} ${read1} ${read2} \ + ihist=${sample.id}/bbmerge/${sample.id}.ihist.txt \ + outinsert=${sample.id}/bbmerge/${sample.id}.inserts.txt \ + outadapter=${sample.id}/bbmerge/${sample.id}.adapters.txt \ + ${params.bbmerge_insert_size} + """ +} \ No newline at end of file diff --git a/nevermore/modules/qc/fastqc.nf b/nevermore/modules/qc/fastqc.nf index c5b3e51..fed7120 100644 --- a/nevermore/modules/qc/fastqc.nf +++ b/nevermore/modules/qc/fastqc.nf @@ -1,5 +1,6 @@ process fastqc { - container "docker://quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" + container "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" + label "medium" input: tuple val(sample), path(reads) diff --git a/nevermore/modules/qc/multiqc.nf b/nevermore/modules/qc/multiqc.nf index 15e8ce7..2f38923 100644 --- a/nevermore/modules/qc/multiqc.nf +++ b/nevermore/modules/qc/multiqc.nf @@ -1,5 +1,5 @@ process multiqc { - container "docker://quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" + container "quay.io/biocontainers/multiqc:1.21--pyhdfd78af_0" input: path(reports) diff --git a/nevermore/modules/qc/subsample.nf b/nevermore/modules/qc/subsample.nf index d2d7bca..495e0a0 100644 --- a/nevermore/modules/qc/subsample.nf +++ b/nevermore/modules/qc/subsample.nf @@ -1,7 +1,16 @@ +// this nonsense parameter-rearrangement is necessary for nf-core schema/clowm compatibility + +params.subsample_random_seed = 313 params.subsample = [:] -params.subsample.random_seed = 313 +if (!params.subsample.random_seed) { + params.subsample.random_seed = params.subsample_random_seed +} + process calculate_library_size_cutoff { + label "tiny" + + input: path(readcounts) val(percentile) @@ -11,7 +20,7 @@ process calculate_library_size_cutoff { script: """ - #!/usr/bin/env python + #!/usr/bin/env python3 import glob import statistics @@ -25,13 +34,23 @@ process calculate_library_size_cutoff { ) ) percentile = ${percentile} - percentiles = statistics.quantiles(d.values(), n=100) - mean_low_counts = statistics.mean(v for v in d.values() if v < percentiles[percentile - 1]) + try: + percentiles = statistics.quantiles(d.values(), n=100) + except statistics.StatisticsError: + percentiles = None + if percentiles is not None: + mean_low_counts = statistics.mean(v for v in d.values() if v < percentiles[percentile - 1]) + else: + mean_low_counts = list(d.values())[0] with open('library_sizes.txt', 'wt') as _out: print(*('sample', 'size', 'do_subsample', 'target_size'), sep='\\t', file=_out) for k, v in d.items(): - print(k, v, int(not v < percentiles[percentile - 1]), int(mean_low_counts + 0.5), sep='\\t', file=_out) + if percentiles is not None: + do_subsample = v >= percentiles[percentile - 1] + else: + do_subsample = False + print(k, v, int(do_subsample), int(mean_low_counts + 0.5), sep='\\t', file=_out) print(mean_low_counts) @@ -42,7 +61,8 @@ process calculate_library_size_cutoff { } process subsample_reads { - container "docker://quay.io/biocontainers/seqtk:1.4--he4a0461_2" + container "quay.io/biocontainers/seqtk:1.4--he4a0461_2" + label "medium" input: tuple val(sample), path(fastqs), val(target_size) diff --git a/nevermore/modules/stats.nf b/nevermore/modules/stats.nf index 0eb8293..a64f6e4 100644 --- a/nevermore/modules/stats.nf +++ b/nevermore/modules/stats.nf @@ -1,5 +1,6 @@ process flagstats { - container "docker://registry.git.embl.de/schudoma/align-docker:latest" + container "registry.git.embl.de/schudoma/align-docker:latest" + label "default" input: tuple val(sample), path(bam) diff --git a/nevermore/version.json b/nevermore/version.json index 4ae9530..3b702c3 100644 --- a/nevermore/version.json +++ b/nevermore/version.json @@ -1,3 +1,3 @@ { - "version": "0.12.6" + "version": "0.13.1" } \ No newline at end of file diff --git a/nevermore/workflows/align.nf b/nevermore/workflows/align.nf index 92ea7e5..c79c5e4 100644 --- a/nevermore/workflows/align.nf +++ b/nevermore/workflows/align.nf @@ -3,41 +3,86 @@ nextflow.enable.dsl=2 include { bwa_mem_align } from "../modules/align/bwa" -include { merge_and_sort } from "../modules/align/helpers" +include { minimap2_align } from "../modules/align/minimap2" +include { merge_and_sort; merge_sam } from "../modules/align/helpers" def asset_dir = "${projectDir}/nevermore/assets" def do_alignment = params.run_gffquant || !params.skip_alignment def do_stream = params.gq_stream def do_preprocessing = (!params.skip_preprocessing || params.run_preprocessing) +params.do_name_sort = true +params.align = [:] +params.align.run_minimap2 = false +params.align.run_bwa = false + workflow nevermore_align { take: fastq_ch - main: - /* align merged single-read and paired-end sets against reference */ + main: + + alignment_ch = Channel.empty() + aln_counts_ch = Channel.empty() - bwa_mem_align( - fastq_ch, - params.reference, - true - ) + /* align merged single-read and paired-end sets against reference */ - /* merge paired-end and single-read alignments into single per-sample bamfiles */ + if (params.align.run_minimap2) { + minimap2_align( + fastq_ch, + params.reference, + params.do_name_sort + ) - aligned_ch = bwa_mem_align.out.bam - .map { sample, bam -> + minimap_aligned_ch = minimap2_align.out.sam + .map { sample, sam -> sample_id = sample.id.replaceAll(/.(orphans|singles|chimeras)$/, "") - return tuple(sample_id, bam) + return tuple(sample_id, sam) } .groupTuple(sort: true) - merge_and_sort(aligned_ch, true) + /* merge paired-end and single-read alignments into single per-sample bamfiles */ + merge_sam(minimap_aligned_ch + .map { sample_id, samfiles -> + def meta = [:] + meta.id = sample_id + return tuple(meta, samfiles) + }) - emit: - alignments = merge_and_sort.out.bam - aln_counts = merge_and_sort.out.flagstats + alignment_ch = alignment_ch + .mix(merge_sam.out.sam) + aln_counts_ch = aln_counts_ch + .mix(merge_sam.out.flagstats) + + } + + if (params.align.run_bwa) { + bwa_mem_align( + fastq_ch, + params.reference, + true + ) + /* merge paired-end and single-read alignments into single per-sample bamfiles */ + + aligned_ch = bwa_mem_align.out.bam + .map { sample, bam -> + sample_id = sample.id.replaceAll(/.(orphans|singles|chimeras)$/, "") + return tuple(sample_id, bam) + } + .groupTuple(sort: true) + + merge_and_sort(aligned_ch, true) + + alignment_ch = alignment_ch + .mix(merge_and_sort.out.bam) + aln_counts_ch = aln_counts_ch + .mix(merge_and_sort.out.flagstats) + } + + emit: + alignments = alignment_ch + aln_counts = aln_counts_ch } diff --git a/nevermore/workflows/decon.nf b/nevermore/workflows/decon.nf index 1fdf05d..273a0da 100644 --- a/nevermore/workflows/decon.nf +++ b/nevermore/workflows/decon.nf @@ -1,4 +1,5 @@ include { remove_host_kraken2_individual; remove_host_kraken2 } from "../modules/decon/kraken2" +include { hostile } from "../modules/decon/hostile" include { sortmerna } from "../modules/decon/sortmerna" @@ -21,7 +22,7 @@ workflow nevermore_decon { sortmerna(for_sortmerna_ch.metaT, params.sortmerna_db) preprocessed_ch = for_sortmerna_ch.metaG - .concat(sortmerna.out.fastqs) + .mix(sortmerna.out.fastqs) } else { @@ -29,12 +30,16 @@ workflow nevermore_decon { } - if (params.remove_host) { - - remove_host_kraken2_individual(preprocessed_ch, params.remove_host_kraken2_db) - - preprocessed_ch = remove_host_kraken2_individual.out.reads - + if (params.remove_host == "hostile") { + + hostile(preprocessed_ch, params.hostile_db) + preprocessed_ch = hostile.out.reads + + } else if ((params.remove_host != false && params.remove_host != null ) || params.remove_host == "kraken") { + + remove_host_kraken2_individual(preprocessed_ch, params.remove_host_kraken2_db) + preprocessed_ch = remove_host_kraken2_individual.out.reads + } emit: diff --git a/nevermore/workflows/gffquant.nf b/nevermore/workflows/gffquant.nf index a098704..ac8713c 100644 --- a/nevermore/workflows/gffquant.nf +++ b/nevermore/workflows/gffquant.nf @@ -2,6 +2,7 @@ include { stream_gffquant; run_gffquant; collate_feature_counts } from "../modul params.gq_collate_columns = "uniq_scaled,combined_scaled" + workflow gffquant_flow { take: @@ -10,9 +11,15 @@ workflow gffquant_flow { main: - stream_gffquant(input_ch, params.gq_database) - feature_count_ch = stream_gffquant.out.results - counts = stream_gffquant.out.results + if (params.gq_stream) { + stream_gffquant(input_ch, params.gffquant_db) + feature_count_ch = (params.gq_panda) ? stream_gffquant.out.profiles : stream_gffquant.out.results + counts = stream_gffquant.out.results + } else { + run_gffquant(input_ch, params.gffquant_db) + feature_count_ch = run_gffquant.out.results + counts = run_gffquant.out.results + } feature_count_ch = feature_count_ch .map { sample, files -> return files } @@ -33,7 +40,10 @@ workflow gffquant_flow { Channel.from(params.gq_collate_columns.split(",")) ) - collate_feature_counts(feature_count_ch, ".txt.gz") + collate_feature_counts( + feature_count_ch, + (params.future_features ? ((params.gq_panda) ? ".pd.txt" : ".txt.gz") : "") + ) emit: diff --git a/nevermore/workflows/input.nf b/nevermore/workflows/input.nf index 02e0dfe..e60fbad 100644 --- a/nevermore/workflows/input.nf +++ b/nevermore/workflows/input.nf @@ -7,8 +7,6 @@ params.bam_input_pattern = "**.bam" def bam_suffix_pattern = params.bam_input_pattern.replaceAll(/\*/, "") -def input_dir = (params.input_dir) ? params.input_dir : params.remote_input_dir - process transfer_fastqs { input: diff --git a/nevermore/workflows/nevermore.nf b/nevermore/workflows/nevermore.nf index 46ad877..b48a1b2 100644 --- a/nevermore/workflows/nevermore.nf +++ b/nevermore/workflows/nevermore.nf @@ -3,7 +3,6 @@ nextflow.enable.dsl=2 include { nevermore_simple_preprocessing } from "./prep" -include { prepare_fastqs } from "../modules/converters/prepare_fastqs" include { fastqc } from "../modules/qc/fastqc" include { multiqc } from "../modules/qc/multiqc" include { collate_stats } from "../modules/collate" @@ -21,7 +20,7 @@ def do_stream = params.gq_stream workflow nevermore_main { take: - fastq_ch + fastq_ch main: if (do_preprocessing) { @@ -30,7 +29,7 @@ workflow nevermore_main { preprocessed_ch = nevermore_simple_preprocessing.out.main_reads_out if (!params.drop_orphans) { - preprocessed_ch = preprocessed_ch.concat(nevermore_simple_preprocessing.out.orphan_reads_out) + preprocessed_ch = preprocessed_ch.mix(nevermore_simple_preprocessing.out.orphan_reads_out) } nevermore_decon(preprocessed_ch) diff --git a/nevermore/workflows/pack.nf b/nevermore/workflows/pack.nf index 0741a2d..7fecf3c 100644 --- a/nevermore/workflows/pack.nf +++ b/nevermore/workflows/pack.nf @@ -7,6 +7,18 @@ workflow nevermore_pack_reads { fastq_ch main: + + /* re-add pair information, which might have been lost upstream */ + + fastq_ch = fastq_ch + .map { sample, fastqs -> + def meta = sample.clone() + meta.is_paired = [fastqs].flatten().size() == 2 + return tuple(meta, fastqs) + } + + fastq_ch.dump(pretty: true, tag: "pack_fastq_ch") + /* route all single-read files into a common channel */ single_ch = fastq_ch @@ -73,15 +85,15 @@ workflow nevermore_pack_reads { meta.merged = false return tuple(meta, fastq) } - .concat(pe_singles_ch.no_merge) - .concat(single_reads_ch.single_end) - .concat(paired_ch) - .concat(merge_single_fastqs.out.fastq) + .mix(pe_singles_ch.no_merge) + .mix(single_reads_ch.single_end) + .mix(paired_ch) + .mix(merge_single_fastqs.out.fastq) fastq_prep_ch = paired_ch - .concat(single_reads_ch.single_end) - .concat(pe_singles_ch.no_merge) - .concat(merge_single_fastqs.out.fastq) + .mix(single_reads_ch.single_end) + .mix(pe_singles_ch.no_merge) + .mix(merge_single_fastqs.out.fastq) emit: fastqs = fastq_prep_ch diff --git a/nevermore/workflows/prep.nf b/nevermore/workflows/prep.nf index 570aa28..7aa81e7 100644 --- a/nevermore/workflows/prep.nf +++ b/nevermore/workflows/prep.nf @@ -15,9 +15,21 @@ def keep_orphans = (params.keep_orphans || false) def asset_dir = "${projectDir}/nevermore/assets" +// this nonsense parameter-rearrangement is necessary for nf-core schema/clowm compatibility + +print "PARAMS_IN_PREP_BEFORE: ${params}" + +params.subsample_subset = null +params.subsample_percentile = 100.0 params.subsample = [:] +if (!params.subsample.subset) { + params.subsample.subset = params.subsample_subset +} +if (!params.subsample.percentile) { + params.subsample.percentile = params.subsample_percentile +} -print asset_dir +print "PARAMS_IN_PREP_AFTER: ${params}" process concat_singles { input: @@ -42,83 +54,88 @@ workflow nevermore_simple_preprocessing { main: rawcounts_ch = Channel.empty() - // if (params.run_qa || params.subsample.subset) { + if (params.run_qa || (params.subsample.subset && params.subsample.percentile < 100.0 && params.subsample.percentile > 0.0)) { - // // fastqc(fastq_ch, "raw") - // // rawcounts_ch = fastqc.out.counts + fastqc(fastq_ch, "raw") + rawcounts_ch = fastqc.out.counts - // // if (params.run_qa) { - // // multiqc( - // // fastqc.out.stats.map { sample, report -> report }.collect(), - // // "${asset_dir}/multiqc.config", - // // "raw" - // // ) - // // } + if (params.run_qa) { + multiqc( + fastqc.out.stats.map { sample, report -> report }.collect(), + "${asset_dir}/multiqc.config", + "raw" + ) + } - // if (params.subsample.subset) { + if (params.subsample.subset && params.subsample.percentile < 100.0 && params.subsample.percentile > 0.0) { + + fastq_ch.dump(pretty: true, tag: "fastq_ch") + + fastq_ch + .branch { + subsample: params.subsample.subset == "all" || it[0].library_source == params.subsample.subset + no_subsample: true + } + .set { check_subsample_ch } + + check_subsample_ch.subsample.dump(pretty: true, tag: "check_subsample_ch") + check_subsample_ch.no_subsample.dump(pretty: true, tag: "check_no_subsample_ch") + // subsample_ch = fastq_ch + // .filter { params.subsample.subset == "all" || it[0].library_source == params.subsample.subset } + // subsample_ch.dump(pretty: true, tag: "subsample_ch") - // fastq_ch - // .branch { - // subsample: params.subsample.subset == "all" || it[0].library_source == params.subsample.subset - // no_subsample: true - // } - // .set { check_subsample_ch } - // // subsample_ch = fastq_ch - // // .filter { params.subsample.subset == "all" || it[0].library_source == params.subsample.subset } - // // subsample_ch.dump(pretty: true, tag: "subsample_ch") - - // calculate_library_size_cutoff( - // fastqc.out.counts - // .filter { params.subsample.subset == "all" || it[0].library_source == params.subsample.subset } - // .map { sample, counts -> return counts } - // .collect(), - // params.subsample.percentile - // ) - // calculate_library_size_cutoff.out.library_sizes.view() - - // css_ch = check_subsample_ch.subsample - // .map { sample, fastqs -> return tuple(sample.id, sample, fastqs) } - // .join( + calculate_library_size_cutoff( + fastqc.out.counts + .filter { params.subsample.subset == "all" || it[0].library_source == params.subsample.subset } + .map { sample, counts -> return counts } + .collect(), + params.subsample.percentile, + ) + calculate_library_size_cutoff.out.library_sizes.view() + + css_ch = check_subsample_ch.subsample + .map { sample, fastqs -> return tuple(sample.id, sample, fastqs) } + .join( - // calculate_library_size_cutoff.out.library_sizes - // .splitCsv(header: true, sep: '\t', strip: true) - // .map { row -> - // return tuple(row.sample, row.do_subsample == "1", row.target_size) - // }, - // by: 0, - // remainder: true - // ) - - // css_ch.dump(pretty: true, tag: "css_ch") - - - // // for some reason, .branch does not work here :S - // subsample_ch = css_ch - // .filter { it[3] } - // .map { sample_id, sample, fastqs, do_subsample, target_size -> - // return tuple(sample, fastqs, target_size) - // } - // subsample_ch.dump(pretty: true, tag: "subsample_ch") - - // subsample_reads(subsample_ch) - - // do_not_subsample_ch = css_ch - // .filter { !it[3] } - // .map { sample_id, sample, fastqs, do_subsample, target_size -> - // return tuple(sample, fastqs) - // } - // .concat( - // check_subsample_ch.no_subsample - // ) - // do_not_subsample_ch.dump(pretty: true, tag: "do_not_subsample_ch") - - // fastq_ch = do_not_subsample_ch - // .concat(subsample_reads.out.subsampled_reads) - - // fastq_ch.dump(pretty: true, tag: "post_subsample_fastq_ch") - // } - - // } + calculate_library_size_cutoff.out.library_sizes + .splitCsv(header: true, sep: '\t', strip: true) + .map { row -> + return tuple(row.sample, row.do_subsample == "1", row.target_size) + }, + by: 0, + remainder: true + ) + + css_ch.dump(pretty: true, tag: "css_ch") + + + // for some reason, .branch does not work here :S + subsample_ch = css_ch + .filter { it[3] } + .map { sample_id, sample, fastqs, do_subsample, target_size -> + return tuple(sample, fastqs, target_size) + } + subsample_ch.dump(pretty: true, tag: "subsample_ch") + + subsample_reads(subsample_ch) + + do_not_subsample_ch = css_ch + .filter { !it[3] } + .map { sample_id, sample, fastqs, do_subsample, target_size -> + return tuple(sample, fastqs) + } + .mix( + check_subsample_ch.no_subsample + ) + do_not_subsample_ch.dump(pretty: true, tag: "do_not_subsample_ch") + + fastq_ch = do_not_subsample_ch + .mix(subsample_reads.out.subsampled_reads) + + fastq_ch.dump(pretty: true, tag: "post_subsample_fastq_ch") + } + + } processed_reads_ch = Channel.empty() orphans_ch = Channel.empty() @@ -126,13 +143,13 @@ workflow nevermore_simple_preprocessing { if (params.amplicon_seq) { qc_bbduk_stepwise_amplicon(fastq_ch, "${asset_dir}/adapters.fa") - processed_reads_ch = processed_reads_ch.concat(qc_bbduk_stepwise_amplicon.out.reads) - orphans_ch = orphans_ch.concat(qc_bbduk_stepwise_amplicon.out.orphans) + processed_reads_ch = processed_reads_ch.mix(qc_bbduk_stepwise_amplicon.out.reads) + orphans_ch = orphans_ch.mix(qc_bbduk_stepwise_amplicon.out.orphans) } else { qc_bbduk(fastq_ch, "${asset_dir}/adapters.fa") - processed_reads_ch = processed_reads_ch.concat(qc_bbduk.out.reads) + processed_reads_ch = processed_reads_ch.mix(qc_bbduk.out.reads) orphans_ch = qc_bbduk.out.orphans .map { sample, file -> def meta = sample.clone() diff --git a/nevermore/workflows/qa.nf b/nevermore/workflows/qa.nf index f5c3459..828be84 100644 --- a/nevermore/workflows/qa.nf +++ b/nevermore/workflows/qa.nf @@ -23,7 +23,7 @@ workflow nevermore_qa { ) readcounts_ch = counts_ch - .concat(fastqc.out.counts) + .mix(fastqc.out.counts) .map { sample, file -> return file } emit: diff --git a/nextflow.config b/nextflow.config index 7072b56..3a9bb37 100644 --- a/nextflow.config +++ b/nextflow.config @@ -4,7 +4,7 @@ manifest { description = "Metagenomics Functional Profiler" name = "nevermore_profiler" nextflowVersion = ">=22.10.6" - version = "0.12.6" + version = "0.13.1" } process {