Skip to content

Commit

Permalink
Merge pull request #192 from sanger-tol/subset_juicer
Browse files Browse the repository at this point in the history
Add subsample and branch hic_mapping
  • Loading branch information
DLBPointon authored Jan 12, 2024
2 parents e1a383f + 178c32a commit 8cb5c9b
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 21 deletions.
4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,11 @@ process {
ext.args = "pairs -0 -c1 3 -p1 4 -c2 7 -p2 8"
}

withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MARKDUP" {
withName: ".*:.*:(HIC_BAMTOBED_COOLER|HIC_BAMTOBED_JUICER):SAMTOOLS_MARKDUP" {
ext.prefix = { "${meta.id}_mkdup" }
}

withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MERGE" {
withName: ".*:.*:(HIC_BAMTOBED_COOLER|HIC_BAMTOBED_JUICER):SAMTOOLS_MERGE" {
ext.prefix = { "${meta.id}_merged" }
}

Expand Down
39 changes: 39 additions & 0 deletions modules/local/subsample_bam.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process SUBSAMPLE_BAM {
tag "${meta.id}"
label 'process_tiny'

conda "bioconda::samtools=1.17"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
'biocontainers/samtools:1.17--h00cdaf9_0' }"

input:
tuple val(meta), path(mergedbam)

output:
tuple val(meta), path('*.bam'), emit: subsampled_bam
path "versions.yml", emit: versions

shell:
def prefix = task.ext.prefix ?: "${meta.id}"
'''
percentage=`wc -c !{mergedbam} | cut -d$' ' -f1 | awk '{printf "%.2f\\n", 50000000000 / $0}'`
samtools view -s $percentage -b !{mergedbam} > !{meta.id}_subsampled.bam
cat <<-END_VERSIONS > versions.yml
"!{task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
END_VERSIONS
'''

stub:
"""
touch ${meta.id}_subsampled.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
END_VERSIONS
"""
}
64 changes: 64 additions & 0 deletions subworkflows/local/hic_bamtobed.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env nextflow

// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow.
// It runs markdup, sort and get paired contacts.
// Input - Assembled genomic fasta file, .bam file
// Output - sorted .bed and paired contact .bed

//
// MODULE IMPORT BLOCK
//
include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main'
include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf'
include { GET_PAIRED_CONTACT_BED } from '../../modules/local/get_paired_contact_bed'


workflow HIC_BAMTOBED {
take:
bam_file // Channel: tuple [ val(meta), path( file ) ]
reference_tuple // Channel: tuple [ val(meta), path( file ) ]

main:
ch_versions = Channel.empty()

//
// LOGIC: PREPARE MARKDUP INPUT
//
bam_file
.combine( reference_tuple )
.multiMap { meta_bam, bam_file, meta_ref, ref ->
bam : tuple(meta_bam, bam_file )
reference : ref
}
.set { markdup_input }

//
// MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES
//
SAMTOOLS_MARKDUP (
markdup_input.bam,
markdup_input.reference
)
ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions )

//
// MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE
//
BAMTOBED_SORT(
SAMTOOLS_MARKDUP.out.bam
)
ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions )

//
// MODULE: GENERATE CONTACT PAIRS
//
GET_PAIRED_CONTACT_BED(
BAMTOBED_SORT.out.sorted_bed
)
ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions )

emit:
paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed
sorted_bed = BAMTOBED_SORT.out.sorted_bed
versions = ch_versions.ifEmpty(null)
}
87 changes: 68 additions & 19 deletions subworkflows/local/hic_mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ include { PRETEXTMAP as PRETEXTMAP_STANDRD } from '../../modules/nf-cor
include { PRETEXTMAP as PRETEXTMAP_HIGHRES } from '../../modules/nf-core/pretextmap/main'
include { PRETEXTSNAPSHOT as SNAPSHOT_SRES } from '../../modules/nf-core/pretextsnapshot/main'
include { PRETEXTSNAPSHOT as SNAPSHOT_HRES } from '../../modules/nf-core/pretextsnapshot/main'
include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'
include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf'
include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv'
include { CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT } from '../../modules/local/cram_filter_align_bwamem2_fixmate_sort'
include { JUICER_TOOLS_PRE } from '../../modules/local/juicer_tools_pre'
include { GET_PAIRED_CONTACT_BED } from '../../modules/local/get_paired_contact_bed'
include { SUBSAMPLE_BAM } from '../../modules/local/subsample_bam.nf'
include { PRETEXT_INGESTION as PRETEXT_INGEST_SNDRD } from '../../subworkflows/local/pretext_ingestion'
include { PRETEXT_INGESTION as PRETEXT_INGEST_HIRES } from '../../subworkflows/local/pretext_ingestion'
include { HIC_BAMTOBED as HIC_BAMTOBED_COOLER } from '../../subworkflows/local/hic_bamtobed'
include { HIC_BAMTOBED as HIC_BAMTOBED_JUICER } from '../../subworkflows/local/hic_bamtobed'


workflow HIC_MAPPING {
Expand Down Expand Up @@ -210,32 +210,61 @@ workflow HIC_MAPPING {
// ch_versions = ch_versions.mix ( SNAPSHOT_HRES.out.versions )

//
// MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES
// LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G
//
SAMTOOLS_MARKDUP (
pretext_input.input_bam,
pretext_input.reference
)
ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions )
SAMTOOLS_MERGE.out.bam
.map{ meta, bam ->
tuple(
[ id : meta.id,
sz : file(bam).size()
],
bam
)
}
.branch {
tosubsample : it[0].sz >= 50000000000
unmodified : it[0].sz < 50000000000
}
.set { ch_merged_bam }

//
// MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE
// MODULE: SUBSAMPLE BAM
//
BAMTOBED_SORT(
SAMTOOLS_MARKDUP.out.bam
SUBSAMPLE_BAM (
ch_merged_bam.tosubsample
)
ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions )
ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions )

//
// LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT
//
ch_subsampled_bam = SUBSAMPLE_BAM.out.subsampled_bam
ch_subsampled_bam.mix(ch_merged_bam.unmodified)

//
// LOGIC: PREPARE BAMTOBED JUICER INPUT
//
ch_subsampled_bam
.combine( reference_tuple )
.multiMap { meta, subsampled_bam, meta_ref, ref ->
bam : tuple(meta, subsampled_bam )
reference : tuple(meta_ref, ref)
}
.set { ch_bamtobed_juicer_input }

//
// MODULE: GENERATE CONTACT PAIRS
// SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM
//
GET_PAIRED_CONTACT_BED( BAMTOBED_SORT.out.sorted_bed )
ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions )
HIC_BAMTOBED_JUICER(
ch_bamtobed_juicer_input.bam,
ch_bamtobed_juicer_input.reference
)
ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions )

//
// LOGIC: PREPARE JUICER TOOLS INPUT
//
GET_PAIRED_CONTACT_BED.out.bed
HIC_BAMTOBED_JUICER.out.paired_contacts_bed
.combine( dot_genome )
.multiMap { meta, paired_contacts, meta_my_genome, my_genome ->
paired : tuple([ id: meta.id, single_end: true], paired_contacts )
Expand All @@ -254,11 +283,31 @@ workflow HIC_MAPPING {
)
ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions )


// LOGIC: PREPARE BAMTOBED JUICER INPUT
//
SAMTOOLS_MERGE.out.bam
.combine( reference_tuple )
.multiMap { meta, merged_bam, meta_ref, ref ->
bam : tuple(meta, merged_bam )
reference : tuple(meta_ref, ref)
}
.set { ch_bamtobed_cooler_input }

//
// SUBWORKFLOW: BAM TO BED FOR COOLER
//
HIC_BAMTOBED_COOLER(
ch_bamtobed_cooler_input.bam,
ch_bamtobed_cooler_input.reference
)
ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions )

//
// LOGIC: BIN CONTACT PAIRS
//
GET_PAIRED_CONTACT_BED.out.bed
.join( BAMTOBED_SORT.out.sorted_bed )
HIC_BAMTOBED_COOLER.out.paired_contacts_bed
.join( HIC_BAMTOBED_COOLER.out.sorted_bed )
.combine( ch_cool_bin )
.set { ch_binned_pairs }

Expand Down

0 comments on commit 8cb5c9b

Please sign in to comment.