Skip to content

Commit

Permalink
Merge pull request #1 from grp-bork/update/nevermore_0.13.0_20240620
Browse files Browse the repository at this point in the history
Update/nevermore 0.13.0 20240620
  • Loading branch information
cschu authored Jun 21, 2024
2 parents d90572d + 66df659 commit 9869c37
Show file tree
Hide file tree
Showing 31 changed files with 589 additions and 131 deletions.
23 changes: 19 additions & 4 deletions metaphlow/modules/profilers/humann3.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
process reduce_metaphlan_profiles {
container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.8--pyh7cba7a3_0"
container = "registry.git.embl.de/schudoma/humann3-docker:latest"
label "default"

input:
path(mp_collated_profiles)
Expand All @@ -24,7 +27,11 @@ process reduce_metaphlan_profiles {


process generate_humann_joint_index {
container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.8--pyh7cba7a3_0"
container = "registry.git.embl.de/schudoma/humann3-docker:latest"

label "process_high"

input:
path(mp_reduced_profiles)
Expand All @@ -46,7 +53,12 @@ process generate_humann_joint_index {


process run_humann3 {
container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.8--pyh7cba7a3_0"
publishDir params.output_dir, mode: "copy"
container = "registry.git.embl.de/schudoma/humann3-docker:latest"

label "process_high"

input:
tuple val(sample), path(mp_profile), path(fastq_files)
Expand Down Expand Up @@ -93,7 +105,10 @@ process run_humann3 {


process reformat_genefamily_table {
container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.7--pyh7cba7a3_1"
// container "quay.io/biocontainers/humann:3.8--pyh7cba7a3_0"
publishDir params.output_dir, mode: "copy"
container = "registry.git.embl.de/schudoma/humann3-docker:latest"

input:
tuple val(sample), path(hm_table)
Expand Down
12 changes: 6 additions & 6 deletions metaphlow/modules/profilers/samestr.nf
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
process run_samestr_convert {
container "registry.git.embl.de/schudoma/samestr-docker:latest"
tag "${sample.id}"
label "process_high_memory"
label "highmem_large"


input:
Expand Down Expand Up @@ -34,7 +34,7 @@ process run_samestr_merge {
publishDir params.output_dir, mode: "copy"
container "registry.git.embl.de/schudoma/samestr-docker:latest"
tag "${species}"
label "process_high_memory"
label "highmem_large"

input:
tuple val(species), path(sstr_npy)
Expand Down Expand Up @@ -62,7 +62,7 @@ process run_samestr_merge {
process run_samestr_filter {
container "registry.git.embl.de/schudoma/samestr-docker:latest"
tag "${species}"
label "process_high_memory"
label "highmem_large"

input:
tuple val(species), path(sstr_npy), path(sstr_names)
Expand Down Expand Up @@ -101,7 +101,7 @@ process run_samestr_stats {
publishDir params.output_dir, mode: "copy"
container "registry.git.embl.de/schudoma/samestr-docker:latest"
tag "${species}"
label "process_high_memory"
label "large"

input:
tuple val(species), path(sstr_npy), path(sstr_names)
Expand All @@ -126,7 +126,7 @@ process run_samestr_compare {
publishDir params.output_dir, mode: "copy"
container "registry.git.embl.de/schudoma/samestr-docker:latest"
tag "${species}"
label "process_high_memory"
label "highmem_large"

input:
tuple val(species), path(sstr_npy), path(sstr_names)
Expand Down Expand Up @@ -154,7 +154,7 @@ process run_samestr_compare {
process run_samestr_summarize {
publishDir params.output_dir, mode: "copy"
container "registry.git.embl.de/schudoma/samestr-docker:latest"
label "process_high_memory"
label "large"

input:
path(sstr_data)
Expand Down
76 changes: 76 additions & 0 deletions nevermore/modules/align/bowtie2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
process bowtie2_build {
container "quay.io/biocontainers/bowtie2:2.5.3--py39h6fed5c7_1"
tag "${sample.id}"

input:
tuple val(sample), path(genomeseq)

output:
tuple val(sample), path("${sample.id}/bowtie2/${sample.id}*"), emit: index

script:
"""
mkdir -p ${sample.id}/bowtie2/
gzip -dc ${genomeseq} > genome.fa
bowtie2-build --threads ${task.cpus} -f genome.fa ${sample.id}/bowtie2/${sample.id}
rm -vf genome.fa
"""

}


process bowtie2_align {
// container "quay.io/biocontainers/bowtie2:2.5.3--py39h6fed5c7_1"
container "registry.git.embl.de/schudoma/bowtie2-docker:latest"
tag "${sample.id}"

input:
tuple val(sample), path(fastqs), path(index)

output:
tuple val(sample), path("${sample.id}/bowtie2_align/${sample.id}.bam"), emit: bam
tuple val(sample), path("${sample.id}/bowtie2_align/${sample.id}.bam.bai"), emit: bai
tuple val(sample), path("${sample.id}.BOWTIE2.DONE"), emit: sentinel

script:

def input_files = ""
def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } )
def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } )
def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } )

if (r1_files.size() != 0 && r2_files.size() != 0) {
input_files += "-1 ${r1_files.join(' ')} -2 ${r2_files.join(' ')}"
single_reads = false
} else if (r1_files.size() != 0) {
input_files += "-U ${r1_files.join(' ')}"
} else if (r2_files.size() != 0) {
input_files += "-U ${r2_files.join(' ')}"
} else if (orphans.size() != 0) {
input_files += "-U ${orphans.join(' ')}"
}

// --fr/--rf/--ff
def threads = task.cpus.intdiv(2)
def bowtie2_options = "-p ${threads} -q --phred33"

def index_id = index[0].name.replaceAll(/.[0-9]+.bt2[l]?$/, "")
// -S ${sample.id}/hisat2_align/${sample.id}.sam
// index_id=\$(ls ${index[0]} | sed 's/\\.[0-9]\\+\\.ht2\$//')
"""
mkdir -p ${sample.id}/bowtie2_align/ tmp/
export TMPDIR=tmp/
bowtie2 -x ${index_id} ${bowtie2_options} ${input_files} > ${sample.id}.sam
samtools sort -@ ${threads} ${sample.id}.sam > ${sample.id}/bowtie2_align/${sample.id}.bam
samtools index ${sample.id}/bowtie2_align/${sample.id}.bam
rm -fv ${sample.id}.sam
touch ${sample.id}.BOWTIE2.DONE
"""

}
44 changes: 39 additions & 5 deletions nevermore/modules/align/helpers.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,58 @@ process merge_and_sort {
val(do_name_sort)

output:
tuple val(sample), path("bam/${sample}.bam"), emit: bam
tuple val(sample), path("stats/bam/${sample}.flagstats.txt"), emit: flagstats
tuple val(sample), path("bam/${sample.id}.bam"), emit: bam
tuple val(sample), path("stats/bam/${sample.id}.flagstats.txt"), emit: flagstats

script:
def sort_order = (do_name_sort) ? "-n" : ""
def merge_cmd = ""

// need a better detection for this
if (bamfiles instanceof Collection && bamfiles.size() >= 2) {
merge_cmd = "samtools merge -@ $task.cpus ${sort_order} bam/${sample}.bam ${bamfiles}"
merge_cmd = "samtools merge -@ $task.cpus ${sort_order} bam/${sample.id}.bam ${bamfiles}"
} else {
merge_cmd = "ln -s ../${bamfiles[0]} bam/${sample}.bam"
merge_cmd = "ln -s ../${bamfiles[0]} bam/${sample.id}.bam"
}

"""
mkdir -p bam/ stats/bam/
${merge_cmd}
samtools flagstats bam/${sample}.bam > stats/bam/${sample}.flagstats.txt
samtools flagstats bam/${sample.id}.bam > stats/bam/${sample.id}.flagstats.txt
"""
}


process merge_sam {
container "quay.io/biocontainers/samtools:1.19.2--h50ea8bc_1"
label 'samtools'

input:
tuple val(sample), path(samfiles)
// val(do_name_sort)

output:
tuple val(sample), path("sam/${sample.id}.sam"), emit: sam
tuple val(sample), path("stats/sam/${sample.id}.flagstats.txt"), emit: flagstats

script:
// def sort_order = (do_name_sort) ? "-n" : ""
def merge_cmd = ""

// need a better detection for this
if (samfiles instanceof Collection && samfiles.size() >= 2) {
// merge_cmd = "samtools merge -@ $task.cpus ${sort_order} bam/${sample.id}.bam ${bamfiles}"
merge_cmd += "samtools view --no-PG -Sh ${samfiles[0]} > sam/${sample.id}.sam\n"
merge_cmd += "samtools view -S ${samfiles[1]} >> sam/${sample.id}.sam"

} else {
merge_cmd = "ln -s ../${samfiles[0]} sam/${sample.id}.sam"
}

"""
mkdir -p sam/ stats/sam/
${merge_cmd}
samtools flagstats sam/${sample.id}.sam > stats/sam/${sample.id}.flagstats.txt
"""
}

Expand Down
86 changes: 86 additions & 0 deletions nevermore/modules/align/hisat2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
process hisat2_build {
container "quay.io/biocontainers/hisat2:2.2.1--hdbdd923_6"
// we need a hisat2/samtools mixed container
// container "registry.git.embl.de/schudoma/hisat2-docker:latest"

input:
tuple val(sample), path(genomeseq)

output:
tuple val(sample), path("${sample.id}/hisat2/${sample.id}*"), emit: index

script:
"""
mkdir -p ${sample.id}/hisat2/
gzip -dc ${genomeseq} > genome.fa
hisat2-build -f genome.fa ${sample.id}/hisat2/${sample.id}
rm -vf genome.fa
"""

}

process hisat2_align {
// container "quay.io/biocontainers/hisat2:2.2.1--hdbdd923_6"
// we need a hisat2/samtools mixed container
container "registry.git.embl.de/schudoma/hisat2-docker:latest"

input:
tuple val(sample), path(fastqs), path(index)

output:
// tuple val(sample), path("${sample.id}/hisat2_align/${sample.id}.bam"), path("${sample.id}/hisat2_align/${sample.id}.bam.bai"), emit: bam
tuple val(sample), path("${sample.id}/hisat2_align/${sample.id}.bam"), emit: bam
tuple val(sample), path("${sample.id}/hisat2_align/${sample.id}.bam.bai"), emit: bai
tuple val(sample), path("${sample.id}.HISAT2.DONE"), emit: sentinel

script:

def input_files = ""
def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } )
def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } )
def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } )

if (r1_files.size() != 0 && r2_files.size() != 0) {
input_files += "-1 ${r1_files.join(' ')} -2 ${r2_files.join(' ')}"
single_reads = false
} else if (r1_files.size() != 0) {
input_files += "-U ${r1_files.join(' ')}"
} else if (r2_files.size() != 0) {
input_files += "-U ${r2_files.join(' ')}"
} else if (orphans.size() != 0) {
input_files += "-U ${orphans.join(' ')}"
}

// --fr/--rf/--ff
def threads = task.cpus.intdiv(2)
def hisat2_options = "-p ${threads} -q --phred33"
if (params.hisat2_no_spliced_alignment) {
// --no-spliced-alignment
hisat2_options += " --no-spliced-alignment"
}

def index_id = index[0].name.replaceAll(/.[0-9]+.ht2$/, "")
// -S ${sample.id}/hisat2_align/${sample.id}.sam
// index_id=\$(ls ${index[0]} | sed 's/\\.[0-9]\\+\\.ht2\$//')
"""
mkdir -p ${sample.id}/hisat2_align/ tmp/
export TMPDIR=tmp/
hisat2 -x ${index_id} ${hisat2_options} ${input_files} > ${sample.id}.sam
samtools sort -@ ${threads} ${sample.id}.sam > ${sample.id}/hisat2_align/${sample.id}.bam
samtools index ${sample.id}/hisat2_align/${sample.id}.bam
rm -fv ${sample.id}.sam
touch ${sample.id}.HISAT2.DONE
"""
// echo "tmpdir is \$TMPDIR"

// hisat2 -x ${sample.id} ${hisat2_options} ${input_files} > ${sample.id}.sam
}



50 changes: 50 additions & 0 deletions nevermore/modules/align/minimap2.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
process minimap2_align {
container "registry.git.embl.de/schudoma/minimap2-docker:latest"
label 'align'

input:
tuple val(sample), path(fastqs)
path(reference)
val(do_name_sort)

output:
tuple val(sample), path("${sample.id}/${sample.id}.sam"), emit: sam

script:
// def reads = (sample.is_paired) ? "${sample.id}_R1.fastq.gz ${sample.id}_R2.fastq.gz" : "${sample.id}_R1.fastq.gz"

def input_files = ""
def r1_files = fastqs.findAll( { it.name.endsWith("_R1.fastq.gz") && !it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } )
def r2_files = fastqs.findAll( { it.name.endsWith("_R2.fastq.gz") } )
def orphans = fastqs.findAll( { it.name.matches("(.*)(singles|orphans|chimeras)(.*)") } )

if (r1_files.size() != 0 && r2_files.size() != 0) {
input_files += "${r1_files.join(' ')} ${r2_files.join(' ')}"
single_reads = false
} else if (r1_files.size() != 0) {
input_files += "${r1_files.join(' ')}"
} else if (r2_files.size() != 0) {
input_files += "${r2_files.join(' ')}"
} else if (orphans.size() != 0) {
input_files += "${orphans.join(' ')}"
}




def threads = task.cpus.intdiv(2)
// def mm_options = "--sam-hit-only -t ${threads} -x sr --secondary=yes -a"
def mm_options = "-t ${threads} -x sr --secondary=yes -a"

// def sort_cmd = "| " + ((do_name_sort) ? "samtools collate -@ ${threads} -o ${sample.id}.bam - tmp/collated_bam" : "samtools sort -@ ${threads} -o ${sample.id}.bam -")
def sort_cmd = "" // we cannot convert large catalogue alignments to bam, hence we cannot properly sort those

"""
set -e -o pipefail
mkdir -p ${sample.id}/ tmp/
minimap2 ${mm_options} --split-prefix ${sample.id}_split ${reference} ${input_files} ${sort_cmd} > ${sample.id}/${sample.id}.sam
rm -rvf tmp/
"""
}
2 changes: 2 additions & 0 deletions nevermore/modules/collate.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
process collate_stats {
label "default"

input:
path(stats_files)

Expand Down
1 change: 1 addition & 0 deletions nevermore/modules/converters/merge_fastqs.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
process merge_single_fastqs {
container "quay.io/biocontainers/bbmap:39.06--h92535d8_0"
label "medium"

input:
tuple val(sample), path(fastqs)
Expand Down
Loading

0 comments on commit 9869c37

Please sign in to comment.