From db2e41cdae02d76f95d6618539ecceea4156fb80 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Tue, 9 Jan 2024 13:55:03 +0000 Subject: [PATCH 01/11] Add subsample and branch hic_mapping --- conf/modules.config | 4 +- modules/local/subsample_bam.nf | 2 +- subworkflows/local/hic_bamtobed.nf | 64 ++++++++++++++++++++++++++++++ subworkflows/local/hic_mapping.nf | 44 +++++++++----------- 4 files changed, 86 insertions(+), 28 deletions(-) create mode 100755 subworkflows/local/hic_bamtobed.nf diff --git a/conf/modules.config b/conf/modules.config index de920d84..44fa96e6 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -319,11 +319,11 @@ process { ext.args = "pairs -0 -c1 3 -p1 4 -c2 7 -p2 8" } - withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MARKDUP" { + withName: ".*:.*:HIC_BAMTOBED:SAMTOOLS_MARKDUP" { ext.prefix = { "${meta.id}_mkdup" } } - withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MERGE" { + withName: ".*:.*:HIC_BAMTOBED:SAMTOOLS_MERGE" { ext.prefix = { "${meta.id}_merged" } } diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index 575b58fe..2f9035fe 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -11,7 +11,7 @@ process SUBSAMPLE_BAM { tuple val(meta), path(mergedbam) output: - tuple val(meta), path('*.bam'), emit: csv + tuple val(meta), path('*.bam'), emit: bam path "versions.yml", emit: versions script: diff --git a/subworkflows/local/hic_bamtobed.nf b/subworkflows/local/hic_bamtobed.nf new file mode 100755 index 00000000..8d5fefea --- /dev/null +++ b/subworkflows/local/hic_bamtobed.nf @@ -0,0 +1,64 @@ +#!/usr/bin/env nextflow + +// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow. +// It runs markdup, sort and get paired contacts. +// Input - Assembled genomic fasta file, .bam file +// Output - sorted .bed and paired contact .bed + +// +// MODULE IMPORT BLOCK +// +include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main' +include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf' +include { GET_PAIRED_CONTACT_BED } from '../../modules/local/get_paired_contact_bed' + + +workflow HIC_BAMTOBED { + take: + bam_file // Channel: tuple [ val(meta), path( file ) ] + reference_tuple // Channel: tuple [ val(meta), path( file ) ] + + main: + ch_versions = Channel.empty() + + // + // LOGIC: PREPARE MARKDUP INPUT + // + bam_file + .combine( reference_tuple ) + .multiMap { meta_bam, bam_file, meta_ref, ref -> + bam : tuple(meta_bam, bam_file ) + reference : ref + } + .set { markdup_input } + + // + // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES + // + SAMTOOLS_MARKDUP ( + markdup_input.bam + markdup_input.reference + ) + ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) + + // + // MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE + // + BAMTOBED_SORT( + SAMTOOLS_MARKDUP.out.bam + ) + ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions ) + + // + // MODULE: GENERATE CONTACT PAIRS + // + GET_PAIRED_CONTACT_BED( + BAMTOBED_SORT.out.sorted_bed + ) + ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions ) + + emit: + paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed + sorted_bed = BAMTOBED_SORT.out.bed + versions = ch_versions.ifEmpty(null) +} diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index 72d70456..60734ded 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -15,16 +15,15 @@ include { PRETEXTMAP as PRETEXTMAP_STANDRD } from '../../modules/nf-cor include { PRETEXTMAP as PRETEXTMAP_HIGHRES } from '../../modules/nf-core/pretextmap/main' include { PRETEXTSNAPSHOT as SNAPSHOT_SRES } from '../../modules/nf-core/pretextsnapshot/main' include { PRETEXTSNAPSHOT as SNAPSHOT_HRES } from '../../modules/nf-core/pretextsnapshot/main' -include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main' include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main' -include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf' include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv' include { CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT } from '../../modules/local/cram_filter_align_bwamem2_fixmate_sort' include { JUICER_TOOLS_PRE } from '../../modules/local/juicer_tools_pre' -include { GET_PAIRED_CONTACT_BED } from '../../modules/local/get_paired_contact_bed' include { SUBSAMPLE_BAM } from '../../modules/local/subsample_bam.nf' include { PRETEXT_INGESTION as PRETEXT_INGEST_SNDRD } from '../../subworkflows/local/pretext_ingestion' include { PRETEXT_INGESTION as PRETEXT_INGEST_HIRES } from '../../subworkflows/local/pretext_ingestion' +include { HIC_BAMTOBED as HIC_BAMTOBED_COOLER } from '../../subworkflows/local/hic_bamtobed' +include { HIC_BAMTOBED as HIC_BAMTOBED_JUICER } from '../../subworkflows/local/hic_bamtobed' workflow HIC_MAPPING { @@ -219,32 +218,18 @@ workflow HIC_MAPPING { ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) // - // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES - // - SAMTOOLS_MARKDUP ( - pretext_input.input_bam, - pretext_input.reference - ) - ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) - + // SUBWORKFLOW: BAM TO BED FOR JUICER - INCLUDES SUBSAMPLING // - // MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE - // - BAMTOBED_SORT( - SAMTOOLS_MARKDUP.out.bam + HIC_BAMTOBED_JUICER( + SUBSAMPLE_BAM.out.bam, + reference_tuple ) - ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions ) - - // - // MODULE: GENERATE CONTACT PAIRS - // - GET_PAIRED_CONTACT_BED( BAMTOBED_SORT.out.sorted_bed ) - ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions ) + ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions ) // // LOGIC: PREPARE JUICER TOOLS INPUT // - GET_PAIRED_CONTACT_BED.out.bed + HIC_BAMTOBED_JUICER.out.paired_contacts_bed .combine( dot_genome ) .multiMap { meta, paired_contacts, meta_my_genome, my_genome -> paired : tuple([ id: meta.id, single_end: true], paired_contacts ) @@ -263,11 +248,20 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) + // + // SUBWORKFLOW: BAM TO BED FOR COOLER + // + HIC_BAMTOBED_COOLER( + SAMTOOLS_MERGE.out.bam, + reference_tuple + ) + ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions ) + // // LOGIC: BIN CONTACT PAIRS // - GET_PAIRED_CONTACT_BED.out.bed - .join( BAMTOBED_SORT.out.sorted_bed ) + HIC_BAMTOBED_COOLER.out.paired_contacts_bed + .join( HIC_BAMTOBED_COOLER.out.sorted_bed ) .combine( ch_cool_bin ) .set { ch_binned_pairs } From c6fdea7e31344ba842799bbe7d8585d435b34f42 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 11:26:10 +0000 Subject: [PATCH 02/11] subsample filename fix --- assets/local_testing/nxOscSUBSET.yaml | 7 ++++--- modules/local/subsample_bam.nf | 4 ++-- subworkflows/local/hic_bamtobed.nf | 4 ++-- subworkflows/local/hic_mapping.nf | 30 +++++++++++++++++++++++---- 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/assets/local_testing/nxOscSUBSET.yaml b/assets/local_testing/nxOscSUBSET.yaml index ef800b93..3db13cfc 100755 --- a/assets/local_testing/nxOscSUBSET.yaml +++ b/assets/local_testing/nxOscSUBSET.yaml @@ -8,9 +8,10 @@ assembly: reference_file: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_SUBSET/assembly/draft/SUBSET_genome/Oscheius_SUBSET.fasta #/lustre/scratch123/tol/resources/treeval/nextflow_test_data/Oscheius_DF5033/assembly/draft/DF5033.hifiasm.noTelos.20211120/DF5033.noTelos.hifiasm.purged.noCont.noMito.fasta assem_reads: - pacbio: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_SUBSET/genomic_data/pacbio/ - hic: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/subset/ - supplementary: path + longread_type: hifi + longread_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_SUBSET/genomic_data/pacbio/ + hic_data: /lustre/scratch123/tol/resources/treeval/treeval-testdata/TreeValSmallData/Oscheius_DF5033/genomic_data/nxOscSpes1/hic-arima2/subset/ + supplementary_data: path alignment: data_dir: /lustre/scratch123/tol/resources/treeval/gene_alignment_data/ common_name: "" # For future implementation (adding bee, wasp, ant etc) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index 2f9035fe..5ac8d66c 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -11,13 +11,13 @@ process SUBSAMPLE_BAM { tuple val(meta), path(mergedbam) output: - tuple val(meta), path('*.bam'), emit: bam + tuple val(meta), path('*.bam'), emit: subsampled_bam path "versions.yml", emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" $/ - bamsize=`wc -c "${file}" | cut -d$' ' -f1` + bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` percentage=`50000000000/bamsize` if [[ $percentage -lt 1 ]] diff --git a/subworkflows/local/hic_bamtobed.nf b/subworkflows/local/hic_bamtobed.nf index 8d5fefea..432ae1b7 100755 --- a/subworkflows/local/hic_bamtobed.nf +++ b/subworkflows/local/hic_bamtobed.nf @@ -36,7 +36,7 @@ workflow HIC_BAMTOBED { // MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES // SAMTOOLS_MARKDUP ( - markdup_input.bam + markdup_input.bam, markdup_input.reference ) ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions ) @@ -59,6 +59,6 @@ workflow HIC_BAMTOBED { emit: paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed - sorted_bed = BAMTOBED_SORT.out.bed + sorted_bed = BAMTOBED_SORT.out.sorted_bed versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index 60734ded..bd7daff1 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -217,12 +217,23 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) + // + // LOGIC: PREPARE BAMTOBED JUICER INPUT + // + SUBSAMPLE_BAM.out.subsampled_bam + .combine( reference_tuple ) + .multiMap { meta, subsampled_bam, meta_ref, ref -> + bam : tuple(meta, subsampled_bam ) + reference : tuple(meta_ref, ref) + } + .set { ch_bamtobed_juicer_input } + // // SUBWORKFLOW: BAM TO BED FOR JUICER - INCLUDES SUBSAMPLING // HIC_BAMTOBED_JUICER( - SUBSAMPLE_BAM.out.bam, - reference_tuple + ch_bamtobed_juicer_input.bam, + ch_bamtobed_juicer_input.reference ) ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions ) @@ -248,12 +259,23 @@ workflow HIC_MAPPING { ) ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions ) + + // LOGIC: PREPARE BAMTOBED JUICER INPUT + // + SAMTOOLS_MERGE.out.bam + .combine( reference_tuple ) + .multiMap { meta, merged_bam, meta_ref, ref -> + bam : tuple(meta, merged_bam ) + reference : tuple(meta_ref, ref) + } + .set { ch_bamtobed_cooler_input } + // // SUBWORKFLOW: BAM TO BED FOR COOLER // HIC_BAMTOBED_COOLER( - SAMTOOLS_MERGE.out.bam, - reference_tuple + ch_bamtobed_cooler_input.bam, + ch_bamtobed_cooler_input.reference ) ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions ) From 2bf8141365fef3e9cdef000c4da778bc273d1487 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 12:10:30 +0000 Subject: [PATCH 03/11] Update modules.config, with updated names --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c4900a06..7a777d26 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -319,11 +319,11 @@ process { ext.args = "pairs -0 -c1 3 -p1 4 -c2 7 -p2 8" } - withName: ".*:.*:HIC_BAMTOBED:SAMTOOLS_MARKDUP" { + withName: ".*:.*:(HIC_BAMTOBED_COOLER|HIC_BAMTOBED_JUICER):SAMTOOLS_MARKDUP" { ext.prefix = { "${meta.id}_mkdup" } } - withName: ".*:.*:HIC_BAMTOBED:SAMTOOLS_MERGE" { + withName: ".*:.*:(HIC_BAMTOBED_COOLER|HIC_BAMTOBED_JUICER):SAMTOOLS_MERGE" { ext.prefix = { "${meta.id}_merged" } } From 1e6b0ea57e00b50f13e5bcd9e01969cfe328356c Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 13:16:21 +0000 Subject: [PATCH 04/11] Subsample percentage fix --- modules/local/subsample_bam.nf | 2 +- subworkflows/local/hic_mapping.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index 5ac8d66c..7d4fd676 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -18,7 +18,7 @@ process SUBSAMPLE_BAM { def prefix = task.ext.prefix ?: "${meta.id}" $/ bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` - percentage=`50000000000/bamsize` + percentage=$((50000000000/$bamsize)) if [[ $percentage -lt 1 ]] then diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index bd7daff1..cb42b593 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -229,7 +229,7 @@ workflow HIC_MAPPING { .set { ch_bamtobed_juicer_input } // - // SUBWORKFLOW: BAM TO BED FOR JUICER - INCLUDES SUBSAMPLING + // SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM // HIC_BAMTOBED_JUICER( ch_bamtobed_juicer_input.bam, From 476d2121bf2349e4d04c421a58d4e2c99d90fbcd Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 13:37:26 +0000 Subject: [PATCH 05/11] Subsample percentage fix using bc --- modules/local/subsample_bam.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index 7d4fd676..2e1898d0 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -18,7 +18,8 @@ process SUBSAMPLE_BAM { def prefix = task.ext.prefix ?: "${meta.id}" $/ bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` - percentage=$((50000000000/$bamsize)) + threshold=50000000000 + percentage=`bc <<< "scale=0;$threshold/$bamsize"` if [[ $percentage -lt 1 ]] then From c997c7ac9526254a49f6ee3e7b7fcbb48c63f1e7 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 13:46:01 +0000 Subject: [PATCH 06/11] Subsample percentage fix using bc - update format --- modules/local/subsample_bam.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index 2e1898d0..e300ea67 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -19,7 +19,7 @@ process SUBSAMPLE_BAM { $/ bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` threshold=50000000000 - percentage=`bc <<< "scale=0;$threshold/$bamsize"` + percentage=`echo "scale=0;$threshold/$bamsize" | bc` if [[ $percentage -lt 1 ]] then From bab39e1a59279fd2d5c32ae1077283c677f78dac Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 13:58:18 +0000 Subject: [PATCH 07/11] Subsample percentage fix using bc - update format quotes --- modules/local/subsample_bam.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index e300ea67..1a64c2bf 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -19,7 +19,7 @@ process SUBSAMPLE_BAM { $/ bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` threshold=50000000000 - percentage=`echo "scale=0;$threshold/$bamsize" | bc` + percentage=`echo 'scale=0;'"$threshold"'/'"$bamsize" | bc` if [[ $percentage -lt 1 ]] then From 33ea9d4f0acb95f1342b9eb2988c07eb989c9ad1 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Wed, 10 Jan 2024 14:20:19 +0000 Subject: [PATCH 08/11] Subsample percentage fix using bc - remove quotes --- modules/local/subsample_bam.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index 1a64c2bf..d1c8371f 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -19,7 +19,7 @@ process SUBSAMPLE_BAM { $/ bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` threshold=50000000000 - percentage=`echo 'scale=0;'"$threshold"'/'"$bamsize" | bc` + percentage=`echo 'scale=0;'$threshold'/'$bamsize | bc` if [[ $percentage -lt 1 ]] then From 8bbca8bff4e697b452a5eeb9f7514046d051611b Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 11 Jan 2024 11:20:29 +0000 Subject: [PATCH 09/11] Update subsample with awk --- modules/local/subsample_bam.nf | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index d1c8371f..efbc2336 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -14,25 +14,22 @@ process SUBSAMPLE_BAM { tuple val(meta), path('*.bam'), emit: subsampled_bam path "versions.yml", emit: versions - script: + shell: def prefix = task.ext.prefix ?: "${meta.id}" - $/ - bamsize=`wc -c "${mergedbam}" | cut -d$' ' -f1` - threshold=50000000000 - percentage=`echo 'scale=0;'$threshold'/'$bamsize | bc` - - if [[ $percentage -lt 1 ]] - then - samtools view -s $percentage -b ${mergedbam} > ${prefix}_subsampled.bam + ''' + percentage=`wc -c !{mergedbam} | cut -d$' ' -f1 | awk '{printf "%.2f\\n", 50000000000 / $0}'` + + if awk "BEGIN {exit !($percentage <= 1 )}"; then + samtools view -s $percentage -b !{mergedbam} > !{meta.id}_subsampled.bam else - mv ${mergedbam} ${prefix}_subsampled.bam + mv !{mergedbam} !{meta.id}_subsampled.bam fi cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) END_VERSIONS - /$ + ''' stub: """ From cce1153e440628f9f839dbebefa9d4edde62bec7 Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 11 Jan 2024 11:40:52 +0000 Subject: [PATCH 10/11] Update subsample versions --- modules/local/subsample_bam.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index efbc2336..f3aa8b57 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -26,7 +26,7 @@ process SUBSAMPLE_BAM { fi cat <<-END_VERSIONS > versions.yml - "${task.process}": + "!{task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' ) END_VERSIONS ''' From eccb097fe20685a970fa4129cf6d4fb0292afe9b Mon Sep 17 00:00:00 2001 From: William Eagles Date: Thu, 11 Jan 2024 16:40:33 +0000 Subject: [PATCH 11/11] Moved if to outside subsample module, resource saving --- modules/local/subsample_bam.nf | 6 +----- subworkflows/local/hic_mapping.nf | 32 +++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/modules/local/subsample_bam.nf b/modules/local/subsample_bam.nf index f3aa8b57..0e5d798a 100755 --- a/modules/local/subsample_bam.nf +++ b/modules/local/subsample_bam.nf @@ -19,11 +19,7 @@ process SUBSAMPLE_BAM { ''' percentage=`wc -c !{mergedbam} | cut -d$' ' -f1 | awk '{printf "%.2f\\n", 50000000000 / $0}'` - if awk "BEGIN {exit !($percentage <= 1 )}"; then - samtools view -s $percentage -b !{mergedbam} > !{meta.id}_subsampled.bam - else - mv !{mergedbam} !{meta.id}_subsampled.bam - fi + samtools view -s $percentage -b !{mergedbam} > !{meta.id}_subsampled.bam cat <<-END_VERSIONS > versions.yml "!{task.process}": diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index cb42b593..23620920 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -210,17 +210,41 @@ workflow HIC_MAPPING { // ch_versions = ch_versions.mix ( SNAPSHOT_HRES.out.versions ) // - // MODULE: SUBSAMPLE BAM IF OVER 50G + // LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G + // + SAMTOOLS_MERGE.out.bam + .map{ meta, bam -> + tuple( + [ id : meta.id, + sz : file(bam).size() + ], + bam + ) + } + .branch { + tosubsample : it[0].sz >= 50000000000 + unmodified : it[0].sz < 50000000000 + } + .set { ch_merged_bam } + + // + // MODULE: SUBSAMPLE BAM // SUBSAMPLE_BAM ( - SAMTOOLS_MERGE.out.bam + ch_merged_bam.tosubsample ) - ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) + ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions ) + + // + // LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT + // + ch_subsampled_bam = SUBSAMPLE_BAM.out.subsampled_bam + ch_subsampled_bam.mix(ch_merged_bam.unmodified) // // LOGIC: PREPARE BAMTOBED JUICER INPUT // - SUBSAMPLE_BAM.out.subsampled_bam + ch_subsampled_bam .combine( reference_tuple ) .multiMap { meta, subsampled_bam, meta_ref, ref -> bam : tuple(meta, subsampled_bam )