Skip to content

Commit

Permalink
Getting closer to parity with TreeVal
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon committed Feb 7, 2024
1 parent c59909c commit d2144be
Show file tree
Hide file tree
Showing 12 changed files with 138 additions and 108 deletions.
76 changes: 50 additions & 26 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ process {
}

// Coverge and repeat, gap, telo
withName: 'UCSC_BEDGRAPHTOBIGWIG|BEDTOOLS_MERGE_MAX|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MIN|GAP_LENGTH|EXTRACT_TELO' {
withName: 'BED2BW_NORMAL|BED2BW_LOG|BEDTOOLS_MERGE_MAX|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MIN|GAP_LENGTH|EXTRACT_TELO' {
publishDir = [
path: { "${params.outdir}/accessory_files" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -69,66 +69,74 @@ process {
ext.args = "-n 1"
}

withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' {
ext.args = "--MD -t 8"
ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
//
// ACCESSORY_FILES -> LONGREAD_COVERAGE
//
withName: ".*:.*:ACCESSORY_FILES:LONGREAD_COVERAGE:SAMTOOLS_VIEW_FILTER_PRIMARY" {
ext.args = "-b -hF 256"
ext.prefix = { "${meta.id}_view" }
}

withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN_SPLIT' {
ext.args = { "-t 20 --split-prefix ${meta.split_prefix}" }
withName: '.*:.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' {
ext.args = "--MD -t 8"
ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" }
}

withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' {
withName: '.*:.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' {
ext.prefix = { "${meta.id}_merge" }
}

withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW' {
ext.args = "-b -hF 256"
ext.prefix = { "${meta.id}_view" }
}

withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' {
withName: '.*:.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' {
ext.args = "-bga -split"
ext.prefix = { "${meta.id}_genome2cov" }
}

withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' {
withName: '.*:.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' {
ext.args = "-d 50"
ext.prefix = { "maxdepth" }
}

withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' {
withName: '.*:.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' {
ext.args = "-d 50"
ext.prefix = { "zerodepth" }
}

withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' {
withName: '.*:.*:.*:LONGREAD_COVERAGE:GNU_SORT' {
ext.args = "-k1,1 -k2,2n"
ext.prefix = { "${meta.id}_sorted" }
}

withName: '.*:.*:LONGREAD_COVERAGE:UCSC_BEDGRAPHTOBIGWIG' {
withName: '.*:.*:.*:LONGREAD_COVERAGE:BED2BW_NORMAL' {
ext.prefix = 'coverage'
}

withName: 'FIND_TELOMERE_REGIONS' {
ext.find_telomere = 'find_telomere'
withName: "FIND_TELOMERE_REGIONS" {
ext.find_telomere = "find_telomere"
}

withName: 'FIND_TELOMERE_WINDOWS' {
ext.telomere_jar = 'telomere.jar'
ext.telomere_jvm_params = '-Xms1g -Xmx1g'
withName: "FIND_TELOMERE_WINDOWS" {
ext.telomere_jar = "telomere.jar"
ext.telomere_jvm_params = "-Xms1g -Xmx1g"
}

withName: PRETEXTMAP_STANDRD {
ext.args = "--sortby length --mapq 0"
ext.prefix = { "${meta.id}_normal" }
ext.args = { "--sortby length --mapq 0 --memory ${task.memory.giga}G" }
ext.prefix = { "${meta.id}_normal_pi" }
}

withName: PRETEXTMAP_HIGHRES {
ext.args = "--sortby length --highRes --mapq 0"
ext.prefix = { "${meta.id}_hr" }
ext.args = { "--sortby length --highRes --mapq 0 --memory ${task.memory.giga}G" }
ext.prefix = { "${meta.id}_hr_pi" }
}

withName: ".*:PRETEXT_INGEST_SNDRD:PRETEXT_GRAPH" {
ext.args = { "--textureBuffer 1G" }
ext.prefix = { "${meta.id}_normal" }
}

withName: ".*:PRETEXT_INGEST_HIRES:PRETEXT_GRAPH" {
ext.args = { "--textureBuffer 1G" }
ext.prefix = { "${meta.id}_hr" }
}

withName: 'SNAPSHOT_SRES' {
Expand All @@ -141,6 +149,22 @@ process {
ext.prefix = { "${meta.id}_hr" }
}

withName: ".*:.*:GENERATE_MAPS:HIC_BWAMEM2:CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT" {
ext.args = ""
ext.args1 = "-F0xB00 -nt"
ext.args2 = { "-5SPCp -H'${rglines}'" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: ".*:.*:GENERATE_MAPS:HIC_MINIMAP2:CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT" {
ext.args = ""
ext.args1 = ""
ext.args2 = { "-ax sr" }
ext.args3 = "-mpu"
ext.args4 = { "--write-index -l1" }
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
Expand Down
23 changes: 14 additions & 9 deletions modules/local/cram_filter_align_bwamem2_fixmate_sort.nf
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {
tag "$meta.id"
label 'process_high'
label "process_high"

container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' :
'biocontainers/mulled-v2-50d89b457e04ed90fa0cbf8ebc3ae1b9ffbc836b:caf993da1689e8d42f5e4c113ffc9ef81d26df96-0' }"
'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' :
'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }"

input:
tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix)
tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(bwaprefix), path(reference)

output:
tuple val(meta), path("*.bam"), emit: mappedbam
Expand All @@ -18,21 +18,26 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {

script:
def args = task.ext.args ?: ''
def args1 = task.ext.args1 ?: ''
def args2 = task.ext.args2 ?: ''
def args3 = task.ext.args3 ?: ''
def args4 = task.ext.args4 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
// Please be aware one of the tools here required mem = 28 * reference size!!!
"""
cram_filter -n ${from}-${to} ${cramfile} - | \\
samtools fastq -F0xB00 -nt - | \\
samtools fastq ${args1} | \\
bwa-mem2 mem -p ${bwaprefix} -t${task.cpus} -5SPCp -H'${rglines}' - | \\
samtools fixmate -mpu - - | \\
samtools sort --write-index -l1 -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam -
samtools fixmate ${args3} - - | \\
samtools sort ${args4} -@${task.cpus} -T ${base}_${chunkid}_sort_tmp -o ${prefix}_${base}_${chunkid}_mem.bam -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g')
END_VERSIONS
"""
// temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//')
// temp removal staden_io_lib: \$(echo \$(staden_io_lib --version 2>&1) | sed 's/^.*staden_io_lib //; s/Using.*\$//') CAUSES ERROR

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
Expand All @@ -44,7 +49,7 @@ process CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' )
bwa-mem2: \$(bwa-mem2 --version | sed 's/bwa-mem2 //g')
bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //')
END_VERSIONS
"""
}
8 changes: 5 additions & 3 deletions modules/local/cram_filter_minimap2_filter5end_fixmate_sort.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
tag "$meta.id"
label "process_high"

container 'quay.io/sanger-tol/cramfilter_bwamem2_minimap2_samtools_perl:0.001-c1'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' :
'biocontainers/mulled-v2-1a6fe65bd6674daba65066aa796ed8f5e8b4687b:688e175eb0db54de17822ba7810cc9e20fa06dd5-0' }"

input:
tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref)
tuple val(meta), path(cramfile), path(cramindex), val(from), val(to), val(base), val(chunkid), val(rglines), val(ref), path(reference)

output:
tuple val(meta), path("*.bam"), emit: mappedbam
Expand Down Expand Up @@ -52,4 +54,4 @@ process CRAM_FILTER_MINIMAP2_FILTER5END_FIXMATE_SORT {
minimap2: \$(echo \$(minimap2 version 2>&1) | sed 's/.* //')
END_VERSIONS
"""
}
}
12 changes: 6 additions & 6 deletions modules/local/pretext_graph.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ process PRETEXT_GRAPH {
'biocontainers/mulled-v2-077b852b8b5440d395ad23f9f24f50c943390a84:da499c75fec554e81f4847c4fa8b6b167afbe3bf-0' }"

input:
tuple val(meta), path(pretext_file)
tuple val(gap), path(gap_file)
tuple val(cov), path(coverage)
tuple val(log), path(log_coverage)
tuple val(telo), path(telomere_file)
tuple val(rep), path(repeat_density)
tuple val(meta), path(pretext_file, stageAs: 'pretext.pretext')
tuple val(gap), path(gap_file, stageAs: 'gap.bed')
tuple val(cov), path(coverage, stageAs: 'coverage.bigWig')
tuple val(log), path(log_coverage, stageAs: 'log_cov.bigWig')
tuple val(telo), path(telomere_file, stageAs: 'telo.bedgraph')
tuple val(rep), path(repeat_density, stageAs: 'repeats.bigWig')

output:
tuple val(meta), path("*.pretext") , emit: pretext
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ params {
teloseq = "TTAGGG"
pacbio = null
cram = null
aligner = "minimap2"
aligner = "bwamem2"
pacbio_type = "hifi"

// Boilerplate options
Expand Down
10 changes: 6 additions & 4 deletions subworkflows/local/accessory_files.nf
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ workflow ACCESSORY_FILES {
//
GET_LARGEST_SCAFF ( GENERATE_GENOME_FILE.out.dotgenome )
ch_versions = ch_versions.mix( GET_LARGEST_SCAFF.out.versions )

//
// SUBWORKFLOW: GENERATES A GAP.BED FILE TO ID THE LOCATIONS OF GAPS
//
Expand All @@ -58,7 +58,7 @@ workflow ACCESSORY_FILES {
reference_tuple,
params.teloseq
)
ch_versions = ch_versions.mix(TELO_FINDER.out.versions)
ch_versions = ch_versions.mix(TELO_FINDER.out.versions)

//
// SUBWORKFLOW: GENERATES A BIGWIG FOR A REPEAT DENSITY TRACK
Expand All @@ -70,10 +70,11 @@ workflow ACCESSORY_FILES {
ch_versions = ch_versions.mix(REPEAT_DENSITY.out.versions)

//
// SUBWORKFLOW: Takes reference, pacbio reads
// SUBWORKFLOW: Takes reference, pacbio reads
//
LONGREAD_COVERAGE (
LONGREAD_COVERAGE (
reference_tuple,
SAMTOOLS_FAIDX.out.fai,
GENERATE_GENOME_FILE.out.dotgenome,
pacbio_reads
)
Expand All @@ -86,6 +87,7 @@ workflow ACCESSORY_FILES {
telo_file = TELO_FINDER.out.bedgraph_file
repeat_file = REPEAT_DENSITY.out.repeat_density
coverage_bw = LONGREAD_COVERAGE.out.ch_bigwig
coverage_log_bw = LONGREAD_COVERAGE.out.ch_bigwig_log
mins_bed = LONGREAD_COVERAGE.out.ch_minbed
half_bed = LONGREAD_COVERAGE.out.ch_halfbed
maxs_bed = LONGREAD_COVERAGE.out.ch_maxbed
Expand Down
Loading

0 comments on commit d2144be

Please sign in to comment.