Skip to content

Commit

Permalink
Changes:
Browse files Browse the repository at this point in the history
- Add nf-test skeleton code
- Change Metator input to BAM files
- Update conda-checking code in bin3c modules
- Add paramters to choose minimum contig size and minimum map % identity
  when binning
  • Loading branch information
Jim Downie committed Dec 20, 2024
1 parent 9693dd7 commit f6db65a
Show file tree
Hide file tree
Showing 22 changed files with 281 additions and 91 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ testing*
*.pyc
null/
co2footprint*

.nf-test/
.nf-test.log
10 changes: 0 additions & 10 deletions assets/test_full_input.yaml

This file was deleted.

11 changes: 0 additions & 11 deletions assets/test_input.yaml

This file was deleted.

8 changes: 0 additions & 8 deletions bin/bin_summary.R
Original file line number Diff line number Diff line change
Expand Up @@ -156,11 +156,3 @@ summary <- map(input_types, \(x) split_and_read(input, x)) |>
reduce(\(x, y) left_join(x, y, by = "bin"))

write_tsv(summary, glue::glue("{input$prefix}.bin_summary.tsv"))

writeLines(
c("BIN_SUMMARY:",
paste0(" R: ", paste0(R.Version()[c("major","minor")], collapse = ".")),
paste0(" tidyverse: ", packageVersion("tidyverse"))
),
"versions.yml"
)
22 changes: 14 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ process {
]

withName: 'BIN3C_MKMAP' {
ext.prefix = { "${meta.id}_${meta.assembler}_bin3c" }
tag = { "${meta.id}_${meta.assembler}" }
ext.args = { "--min-reflen ${params.minimum_contig_size}" }
ext.prefix = { "${meta.id}_${meta.assembler}_bin3c" }
tag = { "${meta.id}_${meta.assembler}" }
}

withName: 'BIN3C_CLUSTER' {
Expand Down Expand Up @@ -76,7 +77,7 @@ process {
}

withName: 'COVERM_CONTIG' {
ext.args = { "-m metabat" }
ext.args = { "-m metabat --min-read-percent-identity ${params.min_hifi_perc_identity}" }
ext.prefix = { "${meta.id}_${meta.assembler}_depth" }
tag = { "${meta.id}_${meta.assembler}" }
publishDir = [
Expand Down Expand Up @@ -178,7 +179,7 @@ process {
}

withName: 'MAXBIN2' {
ext.args = { "" }
ext.args = { "-min_contig_length ${params.minimum_contig_size}" }
ext.prefix = { "${meta.id}_${meta.assembler}_maxbin2" }
tag = { "${meta.id}_${meta.assembler}" }
publishDir = [
Expand All @@ -189,7 +190,7 @@ process {
}

withName: 'METABAT2_METABAT2' {
ext.args = { "" }
ext.args = { "--minContig ${params.minimum_contig_size}" }
ext.prefix = { "${meta.id}_${meta.assembler}_metabat2" }
tag = { "${meta.id}_${meta.assembler}" }
publishDir = [
Expand All @@ -199,8 +200,13 @@ process {
]
}

withName: 'METATOR_PROCESS_INPUT_BAM' {
ext.prefix = { "${meta.id}_${meta.assembler}" } // metator already appends "_metator" to files
tag = { "${meta.id}_${meta.assembler}" }
}

withName: 'METATOR_PIPELINE' {
ext.args = { "--start fastq --aligner bwa" }
ext.args = { "--start bam" }
ext.prefix = { "${meta.id}_${meta.assembler}" } // metator already appends "_metator" to files
tag = { "${meta.id}_${meta.assembler}" }
publishDir = [
Expand All @@ -211,7 +217,7 @@ process {
}

withName: 'MINIMAP2_ALIGN' {
ext.args = { "" }
ext.args = { "-x map-hifi" }
ext.args2 = { "" }
ext.prefix = { "${meta2.id}_${meta2.assembler}" }
tag = { "${meta2.id}_${meta2.assembler}" }
Expand Down Expand Up @@ -255,7 +261,7 @@ process {
]
}

withName: 'SORT_HIC_BAM' {
withName: 'SAMTOOLS_SORT_HIC_BAM' {
ext.args = { "-n" }
ext.prefix = { "${meta.id}_${meta.assembler}" }
tag = { "${meta.id}_${meta.assembler}" }
Expand Down
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,6 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function'

// Input data
input = "${baseDir}/assets/test_input.yaml"
input = params.pipelines_testdata_base_path + "test_input.yaml"
enable_gtdbtk = false
}
3 changes: 3 additions & 0 deletions modules/local/bin3c/cluster/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ process BIN3C_CLUSTER {
path("versions.yml") , emit: versions

script:
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "ERROR: Bin3C is only avaliable as a Docker or Singularity container. If you need to run with conda, run with --enable_bin3c false"
}
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def contigs_basename = contigs.getBaseName()
Expand Down
3 changes: 3 additions & 0 deletions modules/local/bin3c/mkmap/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ process BIN3C_MKMAP {
path("versions.yml") , emit: versions

script:
if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
error "ERROR: Bin3C is only avaliable as a Docker or Singularity container. If you need to run with conda, run with --enable_bin3c false"
}
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// if(!hic_enzymes) error("Error: no enzymes entry found in Hi-C meta object!")
Expand Down
18 changes: 18 additions & 0 deletions modules/local/bin_summary/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,23 @@ process BIN_SUMMARY {
${checkm_input} \\
${tax_input} \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
R: \$(Rscript -e 'cat(paste0(R.Version()[c("major","minor")], collapse = "."))')
tidyverse: \$(Rscript -e 'cat(packageVersion("tidyverse"))')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.bin_summary.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
R: \$(Rscript -e 'cat(paste0(R.Version()[c("major","minor")], collapse = "."))')
tidyverse: \$(Rscript -e 'cat(packageVersion("tidyverse"))')
END_VERSIONS
"""
}
7 changes: 7 additions & 0 deletions modules/local/metator/process_input_bam/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- conda-forge
- bioconda
dependencies:
- bioconda::htslib=1.21
- bioconda::samtools=1.21
- bioconda::bioawk=1.0
64 changes: 64 additions & 0 deletions modules/local/metator/process_input_bam/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
process METATOR_PROCESS_INPUT_BAM {
tag "${meta.id}"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'oras://community.wave.seqera.io/library/htslib_samtools_bioawk:3ff2c81f84424e4c' :
'community.wave.seqera.io/library/htslib_samtools_bioawk:420f5543dfc64992' }"

input:
tuple val(meta), path(bam), val(direction)

output:
tuple val(meta), path("*.bam"), emit: filtered_bam
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
switch(direction) {
case "fwd": flag = "0x40"; break
case "rev": flag = "0x80" ; break
default:
error("ERROR: METATOR_PROCESS_INPUT_BAM direction was not 'fwd' or 'rev'!")
break
}
"""
samtools view --threads ${task.cpus-1} -f ${flag} -o temp.bam ${bam}
samtools view -H --threads ${task.cpus-1} temp.bam > temp_header
samtools view --threads ${task.cpus-1} temp.bam |\\
bioawk -c sam '{ \$flag = and(\$flag , 3860 ) ; print \$0 }' |\\
cat temp_header - |\\
samtools sort --threads ${task.cpus-1} -n -o ${prefix}.${direction}.bam
rm temp.bam temp_header
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
bioawk: 1.0
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
switch(direction) {
case "fwd": flag = "0x40"; break
case "rev": flag = "0x80" ; break
default:
error("ERROR: METATOR_PROCESS_INPUT_BAM direction was not 'fwd' or 'rev'!")
break
}
"""
touch ${prefix}.${direction}.bam
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
bioawk: 1.0
END_VERSIONS
"""
}
4 changes: 2 additions & 2 deletions modules/nf-core/prokka/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ params {
enable_maxbin2 = true
enable_bin3c = true
enable_metator = true
minimum_contig_size = 3000
min_hifi_perc_identity = 97

// Bin refinement options
enable_bin_refinement = true
Expand Down Expand Up @@ -74,7 +76,7 @@ params {
help_full = false
show_hidden = false
version = false
pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/'
pipelines_testdata_base_path = 'https://raw.githubusercontent.com/prototaxites/lrm_testdata/refs/heads/main/'

// Config options
config_profile_name = null
Expand Down
16 changes: 13 additions & 3 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@
"type": "boolean",
"default": true,
"description": "Enable binning with MetaTor."
},
"minimum_contig_size": {
"type": "integer",
"default": 3000,
"description": "The minimum size of contig to be considered when binning. This option applies to MetaBat2, MaxBin2, and Bin3C, but not Metator."
},
"min_hifi_perc_identity": {
"type": "integer",
"default": 97,
"description": "The minimum required percent identity of a read mapping to the assembly to be counted during coverage estimation for MetaBat2 and MaxBin2."
}
}
},
Expand Down Expand Up @@ -134,8 +144,8 @@
"description": "Enable QC using CheckM2."
},
"checkm2_db_version": {
"type": "number",
"default": 5571251.0,
"type": "string",
"default": "5571251",
"description": "If no local CheckM2 database provided, the Zenodo record ID of a CheckM2 database to download."
},
"checkm2_local_db": {
Expand Down Expand Up @@ -330,7 +340,7 @@
"type": "string",
"fa_icon": "far fa-check-circle",
"description": "Base URL or local path to location of pipeline test dataset files",
"default": "https://raw.githubusercontent.com/nf-core/test-datasets/",
"default": "https://raw.githubusercontent.com/prototaxites/lrm_testdata/refs/heads/main/",
"hidden": true
}
}
Expand Down
10 changes: 10 additions & 0 deletions nf-test.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
config {
testsDir "."
workDir ".nf-test"
configFile "tests/nextflow.config"
profile "test"

plugins {
load "[email protected]"
}
}
36 changes: 27 additions & 9 deletions subworkflows/local/binning.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ include { MAXBIN2 } from '../../modules/nf-core/maxbin2/main
include { GAWK as GAWK_MAXBIN2_DEPTHS } from '../../modules/nf-core/gawk/main'
include { METABAT2_METABAT2 } from '../../modules/nf-core/metabat2/metabat2/main'
include { METATOR_PIPELINE } from '../../modules/local/metator/pipeline/main'
include { METATOR_PROCESS_INPUT_BAM } from '../../modules/local/metator/process_input_bam/main'

workflow BINNING {
take:
Expand Down Expand Up @@ -49,11 +50,7 @@ workflow BINNING {
ch_bins = ch_bins.mix(ch_maxbin2_bins)
}

//
// LOGIC: Bin3C is not available in conda
// only run if we are not running with the conda profile
//
if(params.enable_bin3c && !(workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1)) {
if(params.enable_bin3c) {
ch_bin3c_mkmap_input = assemblies
| combine(hic_bam, by: 0)

Expand All @@ -71,18 +68,39 @@ workflow BINNING {
ch_bins = ch_bins.mix(ch_bin3c_bins)
}

// NOTE: currently hi-c input to metator is supplied as fastq rather than BAM
// metator aligns fwd and rev reads independently
// TODO: process mapped bam from READ_MAPPING subworkflow into separate fwd/rev
// bam files
if(params.enable_metator) {
// Old code (keep for now) - run Metator in mapping mode,
// providing hi-c reads as input rather than bam files
/*
ch_assemblies_combine = assemblies
| map {meta, contigs -> [ meta.subMap('id'), meta, contigs ]}
ch_metator_input = ch_assemblies_combine
| combine(hic_reads, by: 0)
| map { meta_join, meta_assembly, contigs, hic -> [meta_assembly, contigs, hic, []]}
METATOR_PIPELINE(ch_metator_input, hic_enzymes)
ch_versions = ch_versions.mix(METATOR_PIPELINE.out.versions)
*/

// Metator expects us to have aligned forward and reverse reads
// independently of one another - munge the bam file
// to filter out forward and reverse reads and remove mate information
// from SAM flags: bitwise and(flag, 3860)
ch_directions = Channel.of("fwd", "rev")
ch_hic_bam_to_process = hic_bam
| combine(ch_directions)

METATOR_PROCESS_INPUT_BAM(ch_hic_bam_to_process)
ch_versions = ch_versions.mix(METATOR_PROCESS_INPUT_BAM.out.versions)

ch_metator_input = METATOR_PROCESS_INPUT_BAM.out.filtered_bam
| groupTuple(by: 0, size: 2)
| combine(assemblies, by: 0)
| map { meta, bams, contigs ->
[ meta, contigs, bams.sort(), [] ]
}

METATOR_PIPELINE(ch_metator_input, hic_enzymes)
ch_versions = ch_versions.mix(METATOR_PIPELINE.out.versions)

Expand Down
Loading

0 comments on commit f6db65a

Please sign in to comment.