Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add subsample and branch hic_mapping #192

Merged
merged 14 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -319,11 +319,11 @@ process {
ext.args = "pairs -0 -c1 3 -p1 4 -c2 7 -p2 8"
}

withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MARKDUP" {
withName: ".*:.*:(HIC_BAMTOBED_COOLER|HIC_BAMTOBED_JUICER):SAMTOOLS_MARKDUP" {
ext.prefix = { "${meta.id}_mkdup" }
}

withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MERGE" {
withName: ".*:.*:(HIC_BAMTOBED_COOLER|HIC_BAMTOBED_JUICER):SAMTOOLS_MERGE" {
ext.prefix = { "${meta.id}_merged" }
}

Expand Down
39 changes: 39 additions & 0 deletions modules/local/subsample_bam.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process SUBSAMPLE_BAM {
tag "${meta.id}"
label 'process_tiny'

conda "bioconda::samtools=1.17"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
'biocontainers/samtools:1.17--h00cdaf9_0' }"

input:
tuple val(meta), path(mergedbam)

output:
tuple val(meta), path('*.bam'), emit: subsampled_bam
path "versions.yml", emit: versions

shell:
def prefix = task.ext.prefix ?: "${meta.id}"
'''
percentage=`wc -c !{mergedbam} | cut -d$' ' -f1 | awk '{printf "%.2f\\n", 50000000000 / $0}'`

samtools view -s $percentage -b !{mergedbam} > !{meta.id}_subsampled.bam

cat <<-END_VERSIONS > versions.yml
"!{task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
END_VERSIONS
'''

stub:
"""
touch ${meta.id}_subsampled.bam

cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
END_VERSIONS
"""
}
64 changes: 64 additions & 0 deletions subworkflows/local/hic_bamtobed.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env nextflow

// This subworkflow takes converts .bam to .bed for the hic_mapping subworkflow.
// It runs markdup, sort and get paired contacts.
// Input - Assembled genomic fasta file, .bam file
// Output - sorted .bed and paired contact .bed

//
// MODULE IMPORT BLOCK
//
include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main'
include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf'
include { GET_PAIRED_CONTACT_BED } from '../../modules/local/get_paired_contact_bed'


workflow HIC_BAMTOBED {
take:
bam_file // Channel: tuple [ val(meta), path( file ) ]
reference_tuple // Channel: tuple [ val(meta), path( file ) ]

main:
ch_versions = Channel.empty()

//
// LOGIC: PREPARE MARKDUP INPUT
//
bam_file
.combine( reference_tuple )
.multiMap { meta_bam, bam_file, meta_ref, ref ->
bam : tuple(meta_bam, bam_file )
reference : ref
}
.set { markdup_input }

//
// MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES
//
SAMTOOLS_MARKDUP (
markdup_input.bam,
markdup_input.reference
)
ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions )

//
// MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE
//
BAMTOBED_SORT(
SAMTOOLS_MARKDUP.out.bam
)
ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions )

//
// MODULE: GENERATE CONTACT PAIRS
//
GET_PAIRED_CONTACT_BED(
BAMTOBED_SORT.out.sorted_bed
)
ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions )

emit:
paired_contacts_bed = GET_PAIRED_CONTACT_BED.out.bed
sorted_bed = BAMTOBED_SORT.out.sorted_bed
versions = ch_versions.ifEmpty(null)
}
87 changes: 68 additions & 19 deletions subworkflows/local/hic_mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,15 @@ include { PRETEXTMAP as PRETEXTMAP_STANDRD } from '../../modules/nf-cor
include { PRETEXTMAP as PRETEXTMAP_HIGHRES } from '../../modules/nf-core/pretextmap/main'
include { PRETEXTSNAPSHOT as SNAPSHOT_SRES } from '../../modules/nf-core/pretextsnapshot/main'
include { PRETEXTSNAPSHOT as SNAPSHOT_HRES } from '../../modules/nf-core/pretextsnapshot/main'
include { SAMTOOLS_MARKDUP } from '../../modules/nf-core/samtools/markdup/main'
include { SAMTOOLS_MERGE } from '../../modules/nf-core/samtools/merge/main'
include { BAMTOBED_SORT } from '../../modules/local/bamtobed_sort.nf'
include { GENERATE_CRAM_CSV } from '../../modules/local/generate_cram_csv'
include { CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT } from '../../modules/local/cram_filter_align_bwamem2_fixmate_sort'
include { JUICER_TOOLS_PRE } from '../../modules/local/juicer_tools_pre'
include { GET_PAIRED_CONTACT_BED } from '../../modules/local/get_paired_contact_bed'
include { SUBSAMPLE_BAM } from '../../modules/local/subsample_bam.nf'
include { PRETEXT_INGESTION as PRETEXT_INGEST_SNDRD } from '../../subworkflows/local/pretext_ingestion'
include { PRETEXT_INGESTION as PRETEXT_INGEST_HIRES } from '../../subworkflows/local/pretext_ingestion'
include { HIC_BAMTOBED as HIC_BAMTOBED_COOLER } from '../../subworkflows/local/hic_bamtobed'
include { HIC_BAMTOBED as HIC_BAMTOBED_JUICER } from '../../subworkflows/local/hic_bamtobed'


workflow HIC_MAPPING {
Expand Down Expand Up @@ -210,32 +210,61 @@ workflow HIC_MAPPING {
// ch_versions = ch_versions.mix ( SNAPSHOT_HRES.out.versions )

//
// MODULE: MERGE POSITION SORTED BAM FILES AND MARK DUPLICATES
// LOGIC: BRANCH TO SUBSAMPLE BAM IF LARGER THAN 50G
//
SAMTOOLS_MARKDUP (
pretext_input.input_bam,
pretext_input.reference
)
ch_versions = ch_versions.mix ( SAMTOOLS_MARKDUP.out.versions )
SAMTOOLS_MERGE.out.bam
.map{ meta, bam ->
tuple(
[ id : meta.id,
sz : file(bam).size()
],
bam
)
}
.branch {
tosubsample : it[0].sz >= 50000000000
unmodified : it[0].sz < 50000000000
}
.set { ch_merged_bam }

//
// MODULE: SAMTOOLS FILTER OUT DUPLICATE READS | BAMTOBED | SORT BED FILE
// MODULE: SUBSAMPLE BAM
//
BAMTOBED_SORT(
SAMTOOLS_MARKDUP.out.bam
SUBSAMPLE_BAM (
ch_merged_bam.tosubsample
)
ch_versions = ch_versions.mix( BAMTOBED_SORT.out.versions )
ch_versions = ch_versions.mix ( SUBSAMPLE_BAM.out.versions )

//
// LOGIC: COMBINE BRANCHED TO SINGLE OUTPUT
//
ch_subsampled_bam = SUBSAMPLE_BAM.out.subsampled_bam
ch_subsampled_bam.mix(ch_merged_bam.unmodified)

//
// LOGIC: PREPARE BAMTOBED JUICER INPUT
//
ch_subsampled_bam
.combine( reference_tuple )
.multiMap { meta, subsampled_bam, meta_ref, ref ->
bam : tuple(meta, subsampled_bam )
reference : tuple(meta_ref, ref)
}
.set { ch_bamtobed_juicer_input }

//
// MODULE: GENERATE CONTACT PAIRS
// SUBWORKFLOW: BAM TO BED FOR JUICER - USES THE SUBSAMPLED MERGED BAM
//
GET_PAIRED_CONTACT_BED( BAMTOBED_SORT.out.sorted_bed )
ch_versions = ch_versions.mix( GET_PAIRED_CONTACT_BED.out.versions )
HIC_BAMTOBED_JUICER(
ch_bamtobed_juicer_input.bam,
ch_bamtobed_juicer_input.reference
)
ch_versions = ch_versions.mix( HIC_BAMTOBED_JUICER.out.versions )

//
// LOGIC: PREPARE JUICER TOOLS INPUT
//
GET_PAIRED_CONTACT_BED.out.bed
HIC_BAMTOBED_JUICER.out.paired_contacts_bed
.combine( dot_genome )
.multiMap { meta, paired_contacts, meta_my_genome, my_genome ->
paired : tuple([ id: meta.id, single_end: true], paired_contacts )
Expand All @@ -254,11 +283,31 @@ workflow HIC_MAPPING {
)
ch_versions = ch_versions.mix( JUICER_TOOLS_PRE.out.versions )


// LOGIC: PREPARE BAMTOBED JUICER INPUT
//
SAMTOOLS_MERGE.out.bam
.combine( reference_tuple )
.multiMap { meta, merged_bam, meta_ref, ref ->
bam : tuple(meta, merged_bam )
reference : tuple(meta_ref, ref)
}
.set { ch_bamtobed_cooler_input }

//
// SUBWORKFLOW: BAM TO BED FOR COOLER
//
HIC_BAMTOBED_COOLER(
ch_bamtobed_cooler_input.bam,
ch_bamtobed_cooler_input.reference
)
ch_versions = ch_versions.mix( HIC_BAMTOBED_COOLER.out.versions )

//
// LOGIC: BIN CONTACT PAIRS
//
GET_PAIRED_CONTACT_BED.out.bed
.join( BAMTOBED_SORT.out.sorted_bed )
HIC_BAMTOBED_COOLER.out.paired_contacts_bed
.join( HIC_BAMTOBED_COOLER.out.sorted_bed )
.combine( ch_cool_bin )
.set { ch_binned_pairs }

Expand Down