From e99f07c2e7ccefb93537c9316b6fbaeee76a6983 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 14 Nov 2023 14:08:10 +0000 Subject: [PATCH 1/7] Fixes for sort and fastk dumping failed runs into tmp, which is then not cleaned --- conf/modules.config | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c9be7b1d..05b251b4 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -41,7 +41,7 @@ process { // Files to be used for pretext, likely to be deleted once the hic workflow is complete. // .bed, .hr.pretext, .lr.pretext, needs centromere} - withName: 'REFORMAT_INTERSECT|SEQTK_CUTN|GAP_LENGTH|PRETEXT_INGEST_HIRES|PRETEXT_INGEST_SNDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES' { + withName: 'REFORMAT_INTERSECT|SEQTK_CUTN|GAP_LENGTH|PRETEXT_GRAPH|PRETEXT_INGEST_HIRES|PRETEXT_INGEST_SNDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES' { publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, @@ -54,17 +54,17 @@ process { } withName: GNU_SORT_A { - ext.args = { "-k1,1 -k2,2n" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } ext.suffix = { "intersect" } } withName: GNU_SORT_B { - ext.args = { "-k1,1 -k2,2n" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } ext.suffix = { "sorted.genome" } } withName: GNU_SORT_C { - ext.args = { "-k1,1 -k2,2n" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } ext.suffix = { "bins" } } @@ -180,7 +180,7 @@ process { } withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' { - ext.args = "-k1,1 -k2,2n" + ext.args = "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." ext.prefix = { "${meta.id}_sorted" } } @@ -294,11 +294,12 @@ process { withName: '.*:.*:GENERATE_GENOME:GNU_SORT' { ext.prefix = { "${meta.id}" } ext.suffix = { "genome" } - ext.args = { '-k2,2 -nr' } + ext.args = { '-k2,2 -nr -S${task.memory.mega - 100}M -T .' } } + withName: FASTK_FASTK { - ext.args = "-k31 -t" + ext.args = "-k31 -t -P." } withName: MERQURYFK_MERQURYFK { From 4641f2963fba601d7397b64b9dac02a9bdf5d50c Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 14 Nov 2023 14:12:30 +0000 Subject: [PATCH 2/7] MERQUERYFK tmp fix --- conf/modules.config | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 05b251b4..6c86b40e 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -49,6 +49,16 @@ process { ] } + withName: MERQURYFK_MERQURYFK { + publishDir = [ + path: { "${params.outdir}/hic_files" }, + mode: params.publish_dir_mode, + pattern: '*.ref.spectra-cn.ln.png' + ] + } + + // MODULE CONFIGS + withName: BEDTOOLS_SORT { ext.prefix = { "${meta.id}.sorted" } } @@ -297,16 +307,11 @@ process { ext.args = { '-k2,2 -nr -S${task.memory.mega - 100}M -T .' } } - withName: FASTK_FASTK { - ext.args = "-k31 -t -P." + ext.args = "-k31 -t -P." } withName: MERQURYFK_MERQURYFK { - publishDir = [ - path: { "${params.outdir}/hic_files" }, - mode: params.publish_dir_mode, - pattern: '*.ref.spectra-cn.ln.png' - ] + ext.args = "-P." } } From 8f0027efb0dadd4a3955e89941542372bf2011c2 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 14 Nov 2023 14:36:37 +0000 Subject: [PATCH 3/7] Correct Zenodo reference in README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9b86dc90..5d1d4aea 100755 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ If you would like to contribute to this pipeline, please see the [contributing g -If you use sanger-tol/treeval for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX). +If you use sanger-tol/treeval for your analysis, please cite it using the following doi: [10.5281/zenodo.10047653](https://doi.org/10.5281/zenodo.10047653). ### Tools From 80a44078a0f5e1908e293528ccbcd2406ecba374 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 15 Nov 2023 10:20:32 +0000 Subject: [PATCH 4/7] Updated to add {} which was stopping some pre-compute --- conf/modules.config | 146 ++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6c86b40e..adff14f5 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -42,7 +42,7 @@ process { // Files to be used for pretext, likely to be deleted once the hic workflow is complete. // .bed, .hr.pretext, .lr.pretext, needs centromere} withName: 'REFORMAT_INTERSECT|SEQTK_CUTN|GAP_LENGTH|PRETEXT_GRAPH|PRETEXT_INGEST_HIRES|PRETEXT_INGEST_SNDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES' { - publishDir = [ + publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -50,7 +50,7 @@ process { } withName: MERQURYFK_MERQURYFK { - publishDir = [ + publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, pattern: '*.ref.spectra-cn.ln.png' @@ -60,49 +60,49 @@ process { // MODULE CONFIGS withName: BEDTOOLS_SORT { - ext.prefix = { "${meta.id}.sorted" } + ext.prefix = { "${meta.id}.sorted" } } withName: GNU_SORT_A { - ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } - ext.suffix = { "intersect" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } + ext.suffix = { "intersect" } } withName: GNU_SORT_B { - ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } - ext.suffix = { "sorted.genome" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } + ext.suffix = { "sorted.genome" } } withName: GNU_SORT_C { - ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } - ext.suffix = { "bins" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } + ext.suffix = { "bins" } } withName: BEDTOOLS_MAKEWINDOWS { - ext.args = { "-w 10000" } + ext.args = { "-w 10000" } } withName: BEDTOOLS_INTERSECT { - ext.prefix = { "${meta.id}_INTERSECT" } + ext.prefix = { "${meta.id}_INTERSECT" } } withName: BEDTOOLS_MAP { - ext.prefix = { "${meta.id}_MAPPED" } - ext.args = { "-c 4 -o sum" } + ext.prefix = { "${meta.id}_MAPPED" } + ext.args = { "-c 4 -o sum" } } withName: SEQTK_CUTN { - ext.args = "-n 1" - ext.prefix = { "${meta.id}_gap" } + ext.args = "-n 1" + ext.prefix = { "${meta.id}_gap" } } withName: MINIPROT_ALIGN { - ext.args = " --gff -j1 -ut16 --gff-delim='#' " + ext.args = " --gff -j1 -ut16 --gff-delim='#' " } withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):MINIMAP2_ALIGN' { - ext.args = {"-ax splice ${meta.intron_size ? "-G ${meta.intron_size}" : ""} --split-prefix ${meta.split_prefix}"} - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + ext.args = {"-ax splice ${meta.intron_size ? "-G ${meta.intron_size}" : ""} --split-prefix ${meta.split_prefix}"} + ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } } withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):BEDTOOLS_BAMTOBED' { @@ -123,29 +123,29 @@ process { } withName: '.*:.*:SELFCOMP:UCSC_BEDTOBIGBED' { - ext.args = { " -type=bed3+3 -extraIndex=qName,qStart,qEnd" } - ext.prefix = { "${meta.id}_selfcomp" } + ext.args = { " -type=bed3+3 -extraIndex=qName,qStart,qEnd" } + ext.prefix = { "${meta.id}_selfcomp" } } withName: '.*:.*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG' { - ext.prefix = { "${meta.id}_repeat_density" } + ext.prefix = { "${meta.id}_repeat_density" } } withName: '.*:.*:REPEAT_DENSITY:REFORMAT_INTERSECT' { - ext.prefix = { "${meta.id}_repeat_mk" } + ext.prefix = { "${meta.id}_repeat_mk" } } withName: '.*:.*:GAP_FINDER:TABIX_BGZIPTABIX' { - ext.prefix = { "gap_${meta.id}" } + ext.prefix = { "gap_${meta.id}" } } withName: '.*:.*:SYNTENY:MINIMAP2_ALIGN' { - ext.args = '-t 8 -x asm10' - ext.prefix = { "${meta.id}_synteny_${reference.getName().tokenize('.')[0]}" } + ext.args = '-t 8 -x asm10' + ext.prefix = { "${meta.id}_synteny_${reference.getName().tokenize('.')[0]}" } } withName : MUMMER { - ext.args = "-n -b -c -L -l 400" + ext.args = "-n -b -c -L -l 400" } // @@ -162,60 +162,60 @@ process { } withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { - ext.prefix = { "${meta.id}_merge" } + ext.prefix = { "${meta.id}_merge" } } withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT' { - ext.prefix = { "${meta.id}_sorted" } + ext.prefix = { "${meta.id}_sorted" } } withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW' { - ext.args = "-b -hF 256" - ext.prefix = { "${meta.id}_view" } + ext.args = "-b -hF 256" + ext.prefix = { "${meta.id}_view" } } withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' { - ext.args = "-bga -split" - ext.prefix = { "${meta.id}_genome2cov" } + ext.args = "-bga -split" + ext.prefix = { "${meta.id}_genome2cov" } } withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { - ext.args = "-d 50" - ext.prefix = { "maxdepth" } + ext.args = "-d 50" + ext.prefix = { "maxdepth" } } withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { - ext.args = "-d 50" - ext.prefix = { "zerodepth" } + ext.args = "-d 50" + ext.prefix = { "zerodepth" } } withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' { - ext.args = "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." - ext.prefix = { "${meta.id}_sorted" } + ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } + ext.prefix = { "${meta.id}_sorted" } } withName: '.*:.*:LONGREAD_COVERAGE:BED2BW_NORMAL' { - ext.prefix = { "${meta.id}_coverage_normal" } + ext.prefix = { "${meta.id}_coverage_normal" } } withName: '.*:.*:LONGREAD_COVERAGE:BED2BW_LOG2' { - ext.prefix = { "${meta.id}_coverage_log2" } + ext.prefix = { "${meta.id}_coverage_log2" } } // // TELOMERE BLOCK // withName: 'FIND_TELOMERE_REGIONS' { - ext.find_telomere = 'find_telomere' + ext.find_telomere = 'find_telomere' } withName: 'FIND_TELOMERE_WINDOWS' { - ext.telomere_jar = 'telomere.jar' + ext.telomere_jar = 'telomere.jar' ext.telomere_jvm_params = '-Xms1g -Xmx1g' } withName: '.*:.*:TELO_FINDER:TABIX_BGZIPTABIX' { - ext.prefix = { "telo_${meta.id}" } + ext.prefix = { "telo_${meta.id}" } } // @@ -232,86 +232,86 @@ process { } withName: '.*:.*:BUSCO_ANNOTATION:BEDTOOLS_SORT' { - ext.prefix = { "${meta.id}_busco.sorted" } + ext.prefix = { "${meta.id}_busco.sorted" } } withName: '.*:.*:.*:ANCESTRAL_GENE:BEDTOOLS_SORT' { - ext.prefix = { "${meta.id}_ancestral.sorted" } + ext.prefix = { "${meta.id}_ancestral.sorted" } } withName: 'BUSCO' { - ext.args = "--mode genome" + ext.args = "--mode genome" } // // HIC MAPPING BLOCK // normal = standard run, pi = "pre-ingestion", hr = High res withName: PRETEXTMAP_STANDRD { - ext.args = "--sortby length --mapq 0" - ext.prefix = { "${meta.id}_normal_pi" } + ext.args = "--sortby length --mapq 0" + ext.prefix = { "${meta.id}_normal_pi" } } withName: PRETEXTMAP_HIGHRES { - ext.args = "--sortby length --highRes --mapq 0" - ext.prefix = { "${meta.id}_hr_pi" } + ext.args = "--sortby length --highRes --mapq 0" + ext.prefix = { "${meta.id}_hr_pi" } } withName: '.*:.*:.*:PRETEXT_INGEST_SNDRD:PRETEXT_GRAPH' { - ext.args = "" - ext.prefix = { "${meta.id}_normal" } + ext.args = "" + ext.prefix = { "${meta.id}_normal" } } withName: '.*:.*:.*:PRETEXT_INGEST_HIRES:PRETEXT_GRAPH' { - ext.args = "" - ext.prefix = { "${meta.id}_hr" } + ext.args = "" + ext.prefix = { "${meta.id}_hr" } } withName: 'SNAPSHOT_SRES' { - ext.args = "--sequences '=full' --resolution 1440" - ext.prefix = { "${meta.id}_normal" } + ext.args = "--sequences '=full' --resolution 1440" + ext.prefix = { "${meta.id}_normal" } } withName: 'SNAPSHOT_HRES' { - ext.args = "--sequences '=full' --resolution 1440" - ext.prefix = { "${meta.id}_hr" } + ext.args = "--sequences '=full' --resolution 1440" + ext.prefix = { "${meta.id}_hr" } } withName: JUICER_TOOLS_PRE { - ext.juicer_tools_jar = 'juicer_tools.1.8.9_jcuda.0.8.jar' - ext.juicer_jvm_params = '-Xms36g -Xmx36g' + ext.juicer_tools_jar = 'juicer_tools.1.8.9_jcuda.0.8.jar' + ext.juicer_jvm_params = '-Xms36g -Xmx36g' } withName: COOLER_CLOAD { - ext.args = 'pairs -0 -c1 3 -p1 4 -c2 7 -p2 8' + ext.args = 'pairs -0 -c1 3 -p1 4 -c2 7 -p2 8' } withName: '.*:.*:HIC_MAPPING:SAMTOOLS_MARKDUP' { - ext.prefix = { "${meta.id}_mkdup" } + ext.prefix = { "${meta.id}_mkdup" } } withName: '.*:.*:HIC_MAPPING:SAMTOOLS_MERGE' { - ext.prefix = { "${meta.id}_merged" } + ext.prefix = { "${meta.id}_merged" } } withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { - ext.args = '' - ext.args1 = '-F0xB00 -nt' - ext.args2 = { "-5SPCp -H'${rglines}'" } - ext.args3 = '-mpu' - ext.args4 = { '--write-index -l1' } + ext.args = '' + ext.args1 = '-F0xB00 -nt' + ext.args2 = { "-5SPCp -H'${rglines}'" } + ext.args3 = '-mpu' + ext.args4 = { '--write-index -l1' } } withName: '.*:.*:GENERATE_GENOME:GNU_SORT' { - ext.prefix = { "${meta.id}" } - ext.suffix = { "genome" } - ext.args = { '-k2,2 -nr -S${task.memory.mega - 100}M -T .' } + ext.prefix = { "${meta.id}" } + ext.suffix = { "genome" } + ext.args = { '-k2,2 -nr -S${task.memory.mega - 100}M -T .' } } withName: FASTK_FASTK { - ext.args = "-k31 -t -P." + ext.args = "-k31 -t -P ." } withName: MERQURYFK_MERQURYFK { - ext.args = "-P." + ext.args = "-P ." } } From fbc0639b64a7c9f43b4803f50b78338a96af28f9 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 15 Nov 2023 10:21:33 +0000 Subject: [PATCH 5/7] Added some notes and emit message to pretext_graph --- subworkflows/local/pretext_ingestion.nf | 1 + subworkflows/local/yaml_input.nf | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/pretext_ingestion.nf b/subworkflows/local/pretext_ingestion.nf index 95ecb3ab..726f218d 100644 --- a/subworkflows/local/pretext_ingestion.nf +++ b/subworkflows/local/pretext_ingestion.nf @@ -53,5 +53,6 @@ workflow PRETEXT_INGESTION { ch_versions = ch_versions.mix( PRETEXT_GRAPH.out.versions ) emit: + pretext_file = PRETEXT_GRAPH.out.pretext versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index ffe02e71..50101f8d 100755 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -21,7 +21,7 @@ workflow YAML_INPUT { .multiMap { data -> assembly: ( data.assembly ) assembly_reads: ( data.assem_reads ) - reference: ( file(data.reference_file) ) + reference: ( file(data.reference_file, checkIfExists: true) ) alignment: ( data.alignment ) self_comp: ( data.self_comp ) synteny: ( data.synteny ) @@ -101,6 +101,9 @@ workflow YAML_INPUT { } .set { busco_lineage } + // + // LOGIC: COMBINE SOME CHANNELS INTO VALUES REQUIRED DOWNSTREAM + // assembly_data.sample_id .combine( assembly_data.asmVersion ) .map { it1, it2 -> From 8b4bc605a3a8ba51ba61d0891828b3cfc0ffc941 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 17 Nov 2023 11:50:05 +0000 Subject: [PATCH 6/7] Further fixes for tmp (was missing "") closes #174 and added process.tiny to GrabFiles (they dont need 6GB!) closes #125 --- conf/base.config | 20 +++- conf/modules.config | 122 ++++++++++++------------ modules/local/generate_cram_csv.nf | 2 +- subworkflows/local/ancestral_gene.nf | 1 + subworkflows/local/busco_annotation.nf | 1 + subworkflows/local/hic_mapping.nf | 2 + subworkflows/local/kmer.nf | 20 ++-- subworkflows/local/longread_coverage.nf | 2 + 8 files changed, 95 insertions(+), 75 deletions(-) diff --git a/conf/base.config b/conf/base.config index 33341a18..64bf001b 100755 --- a/conf/base.config +++ b/conf/base.config @@ -66,15 +66,21 @@ process { } // CUSTOM CONFIGS - // TODO: add process.tiny + + withLabel:process_tiny { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 100.MB * task.attempt , 'memory' ) } + time = { check_max( 4.h * task.attempt , 'time' ) } + } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } + // IN CASES WHERE THERE IS ONE HIC FILE THIS WILL NEED ALMOST NOTHING withName:SAMTOOLS_MERGE { - cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 150.GB * task.attempt, 'memory') } + cpus = { check_max( 16 * 1, 'cpus' ) } + memory = { check_max( 50.GB * task.attempt, 'memory') } } // RESOURCES: MEMORY INTENSIVE STEPS, SOFTWARE TO BE UPDATED TO COMBAT THIS @@ -151,6 +157,12 @@ process { memory = { check_max( 100.MB * task.attempt, 'memory' ) } } + withName: SNAPSHOT_SRES { + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 1.GB * task.attempt, 'memory' ) } + } + + // NOT CURRENTLY IN USE withName: SNAPSHOT_HRES { cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 50.GB * task.attempt, 'memory' ) } @@ -173,7 +185,7 @@ process { withName: COOLER_CLOAD { cpus = { check_max( 16 * 1, 'cpus' ) } - memory = { check_max( 100.GB * task.attempt, 'memory' ) } + memory = { check_max( 50.GB * task.attempt, 'memory' ) } } withName: BUSCO { diff --git a/conf/modules.config b/conf/modules.config index adff14f5..d6e5d241 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,39 +13,39 @@ process { withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + path: { "${params.outdir}/${task.process.tokenize(":")[-1].tokenize("_")[0].toLowerCase()}" }, mode: params.publish_dir_mode, - pattern: '*_versions.yml' + pattern: "*_versions.yml" ] } // Files to be uploaded to the TreeVal JBrowse2 instance // .genome, .gz.{tbi|csi}, .bigBed, .bigWig, .paf - withName: 'GENERATE_GENOME_FILE|TABIX_BGZIPTABIX|UCSC_BEDTOBIGBED|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|.*:.*:SYNTENY:MINIMAP2_ALIGN|.*:.*:GENERATE_GENOME:GNU_SORT' { + withName: "GENERATE_GENOME_FILE|TABIX_BGZIPTABIX|UCSC_BEDTOBIGBED|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|.*:.*:SYNTENY:MINIMAP2_ALIGN|.*:.*:GENERATE_GENOME:GNU_SORT" { publishDir = [ path: { "${params.outdir}/treeval_upload" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } ] } // Files to be stored along side the TreeVal files for access by curators // all are .bed - withName: 'PAF2BED|EXTRACT_COV_IDEN|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MAX|BEDTOOLS_MERGE_MIN' { + withName: "PAF2BED|EXTRACT_COV_IDEN|FINDHALFCOVERAGE|BEDTOOLS_MERGE_MAX|BEDTOOLS_MERGE_MIN" { publishDir = [ path: { "${params.outdir}/treeval_upload/punchlists" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } ] } // Files to be used for pretext, likely to be deleted once the hic workflow is complete. // .bed, .hr.pretext, .lr.pretext, needs centromere} - withName: 'REFORMAT_INTERSECT|SEQTK_CUTN|GAP_LENGTH|PRETEXT_GRAPH|PRETEXT_INGEST_HIRES|PRETEXT_INGEST_SNDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES' { + withName: "REFORMAT_INTERSECT|SEQTK_CUTN|GAP_LENGTH|PRETEXT_GRAPH|PRETEXT_INGEST_HIRES|PRETEXT_INGEST_SNDRD|COOLER_ZOOMIFY|COV_FOLDER|UCSC_BEDGRAPHTOBIGWIG|BED2BW_NORMAL|BED2BW_LOG2|EXTRACT_TELO|JUICER_TOOLS_PRE|SNAPSHOT_SRES|SNAPSHOT_HRES" { publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } ] } @@ -53,7 +53,7 @@ process { publishDir = [ path: { "${params.outdir}/hic_files" }, mode: params.publish_dir_mode, - pattern: '*.ref.spectra-cn.ln.png' + pattern: "*.ref.spectra-cn.ln.png" ] } @@ -100,48 +100,48 @@ process { ext.args = " --gff -j1 -ut16 --gff-delim='#' " } - withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):MINIMAP2_ALIGN' { + withName: ".*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):MINIMAP2_ALIGN" { ext.args = {"-ax splice ${meta.intron_size ? "-G ${meta.intron_size}" : ""} --split-prefix ${meta.split_prefix}"} - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize(".")[0]}" } } - withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):BEDTOOLS_BAMTOBED' { + withName: ".*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):BEDTOOLS_BAMTOBED" { ext.args = "-bed12" } - withName: '.*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):UCSC_BEDTOBIGBED' { + withName: ".*:.*:.*:(GEN_ALIGNMENTS|RNA_ALIGNMENTS|CDS_ALIGNMENTS):UCSC_BEDTOBIGBED" { ext.prefix = { "${meta.id}_${meta.type}" } } - withName: '.*:.*:.*:PEP_ALIGNMENTS:BEDTOOLS_SORT' { + withName: ".*:.*:.*:PEP_ALIGNMENTS:BEDTOOLS_SORT" { ext.prefix = { "${meta.id}_prot" } } - withName: '.*:.*:INSILICO_DIGEST:UCSC_BEDTOBIGBED' { + withName: ".*:.*:INSILICO_DIGEST:UCSC_BEDTOBIGBED" { ext.args = { "-type=bed4+1 -extraIndex=length" } ext.prefix = { "${meta.id}" } } - withName: '.*:.*:SELFCOMP:UCSC_BEDTOBIGBED' { + withName: ".*:.*:SELFCOMP:UCSC_BEDTOBIGBED" { ext.args = { " -type=bed3+3 -extraIndex=qName,qStart,qEnd" } ext.prefix = { "${meta.id}_selfcomp" } } - withName: '.*:.*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG' { + withName: ".*:.*:REPEAT_DENSITY:UCSC_BEDGRAPHTOBIGWIG" { ext.prefix = { "${meta.id}_repeat_density" } } - withName: '.*:.*:REPEAT_DENSITY:REFORMAT_INTERSECT' { + withName: ".*:.*:REPEAT_DENSITY:REFORMAT_INTERSECT" { ext.prefix = { "${meta.id}_repeat_mk" } } - withName: '.*:.*:GAP_FINDER:TABIX_BGZIPTABIX' { + withName: ".*:.*:GAP_FINDER:TABIX_BGZIPTABIX" { ext.prefix = { "gap_${meta.id}" } } - withName: '.*:.*:SYNTENY:MINIMAP2_ALIGN' { - ext.args = '-t 8 -x asm10' - ext.prefix = { "${meta.id}_synteny_${reference.getName().tokenize('.')[0]}" } + withName: ".*:.*:SYNTENY:MINIMAP2_ALIGN" { + ext.args = "-t 8 -x asm10" + ext.prefix = { "${meta.id}_synteny_${reference.getName().tokenize(".")[0]}" } } withName : MUMMER { @@ -151,95 +151,95 @@ process { // // LONGREAD BLOCK // - withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN' { + withName: ".*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN" { ext.args = "--MD -t 8" - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize(".")[0]}" } } - withName: '.*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN_SPLIT' { + withName: ".*:.*:LONGREAD_COVERAGE:MINIMAP2_ALIGN_SPLIT" { ext.args = { "-t 20 --split-prefix ${meta.split_prefix}" } - ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize('.')[0]}" } + ext.prefix = { "${meta.id}_alignment_${reference.getName().tokenize(".")[0]}" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE' { + withName: ".*:.*:LONGREAD_COVERAGE:SAMTOOLS_MERGE" { ext.prefix = { "${meta.id}_merge" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT' { + withName: ".*:.*:LONGREAD_COVERAGE:SAMTOOLS_SORT" { ext.prefix = { "${meta.id}_sorted" } } - withName: '.*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW' { + withName: ".*:.*:LONGREAD_COVERAGE:SAMTOOLS_VIEW" { ext.args = "-b -hF 256" ext.prefix = { "${meta.id}_view" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV' { + withName: ".*:.*:LONGREAD_COVERAGE:BEDTOOLS_GENOMECOV" { ext.args = "-bga -split" ext.prefix = { "${meta.id}_genome2cov" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX' { + withName: ".*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MAX" { ext.args = "-d 50" ext.prefix = { "maxdepth" } } - withName: '.*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN' { + withName: ".*:.*:LONGREAD_COVERAGE:BEDTOOLS_MERGE_MIN" { ext.args = "-d 50" ext.prefix = { "zerodepth" } } - withName: '.*:.*:LONGREAD_COVERAGE:GNU_SORT' { + withName: ".*:.*:LONGREAD_COVERAGE:GNU_SORT" { ext.args = { "-k1,1 -k2,2n -S${task.memory.mega - 100}M -T ." } ext.prefix = { "${meta.id}_sorted" } } - withName: '.*:.*:LONGREAD_COVERAGE:BED2BW_NORMAL' { + withName: ".*:.*:LONGREAD_COVERAGE:BED2BW_NORMAL" { ext.prefix = { "${meta.id}_coverage_normal" } } - withName: '.*:.*:LONGREAD_COVERAGE:BED2BW_LOG2' { + withName: ".*:.*:LONGREAD_COVERAGE:BED2BW_LOG2" { ext.prefix = { "${meta.id}_coverage_log2" } } // // TELOMERE BLOCK // - withName: 'FIND_TELOMERE_REGIONS' { - ext.find_telomere = 'find_telomere' + withName: "FIND_TELOMERE_REGIONS" { + ext.find_telomere = "find_telomere" } - withName: 'FIND_TELOMERE_WINDOWS' { - ext.telomere_jar = 'telomere.jar' - ext.telomere_jvm_params = '-Xms1g -Xmx1g' + withName: "FIND_TELOMERE_WINDOWS" { + ext.telomere_jar = "telomere.jar" + ext.telomere_jvm_params = "-Xms1g -Xmx1g" } - withName: '.*:.*:TELO_FINDER:TABIX_BGZIPTABIX' { + withName: ".*:.*:TELO_FINDER:TABIX_BGZIPTABIX" { ext.prefix = { "telo_${meta.id}" } } // // BUSCO BLOCK // - withName: '.*:.*:BUSCO_ANNOTATION:UCSC_BEDTOBIGBED' { + withName: ".*:.*:BUSCO_ANNOTATION:UCSC_BEDTOBIGBED" { ext.args = { "-type=bed3+4 -extraIndex=name,OrthoDBurl" } ext.prefix = { "${meta.id}_buscogene" } } - withName: '.*:.*:.*:ANCESTRAL_GENE:UCSC_BEDTOBIGBED' { + withName: ".*:.*:.*:ANCESTRAL_GENE:UCSC_BEDTOBIGBED" { ext.args = { "-type=bed3+4 -extraIndex=name,OrthoDBurl" } ext.prefix = { "${meta.id}_ancestral" } } - withName: '.*:.*:BUSCO_ANNOTATION:BEDTOOLS_SORT' { + withName: ".*:.*:BUSCO_ANNOTATION:BEDTOOLS_SORT" { ext.prefix = { "${meta.id}_busco.sorted" } } - withName: '.*:.*:.*:ANCESTRAL_GENE:BEDTOOLS_SORT' { + withName: ".*:.*:.*:ANCESTRAL_GENE:BEDTOOLS_SORT" { ext.prefix = { "${meta.id}_ancestral.sorted" } } - withName: 'BUSCO' { + withName: "BUSCO" { ext.args = "--mode genome" } @@ -256,55 +256,55 @@ process { ext.prefix = { "${meta.id}_hr_pi" } } - withName: '.*:.*:.*:PRETEXT_INGEST_SNDRD:PRETEXT_GRAPH' { + withName: ".*:.*:.*:PRETEXT_INGEST_SNDRD:PRETEXT_GRAPH" { ext.args = "" ext.prefix = { "${meta.id}_normal" } } - withName: '.*:.*:.*:PRETEXT_INGEST_HIRES:PRETEXT_GRAPH' { + withName: ".*:.*:.*:PRETEXT_INGEST_HIRES:PRETEXT_GRAPH" { ext.args = "" ext.prefix = { "${meta.id}_hr" } } - withName: 'SNAPSHOT_SRES' { + withName: "SNAPSHOT_SRES" { ext.args = "--sequences '=full' --resolution 1440" ext.prefix = { "${meta.id}_normal" } } - withName: 'SNAPSHOT_HRES' { + withName: "SNAPSHOT_HRES" { ext.args = "--sequences '=full' --resolution 1440" ext.prefix = { "${meta.id}_hr" } } withName: JUICER_TOOLS_PRE { - ext.juicer_tools_jar = 'juicer_tools.1.8.9_jcuda.0.8.jar' - ext.juicer_jvm_params = '-Xms36g -Xmx36g' + ext.juicer_tools_jar = "juicer_tools.1.8.9_jcuda.0.8.jar" + ext.juicer_jvm_params = "-Xms36g -Xmx36g" } withName: COOLER_CLOAD { - ext.args = 'pairs -0 -c1 3 -p1 4 -c2 7 -p2 8' + ext.args = "pairs -0 -c1 3 -p1 4 -c2 7 -p2 8" } - withName: '.*:.*:HIC_MAPPING:SAMTOOLS_MARKDUP' { + withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MARKDUP" { ext.prefix = { "${meta.id}_mkdup" } } - withName: '.*:.*:HIC_MAPPING:SAMTOOLS_MERGE' { + withName: ".*:.*:HIC_MAPPING:SAMTOOLS_MERGE" { ext.prefix = { "${meta.id}_merged" } } withName: CRAM_FILTER_ALIGN_BWAMEM2_FIXMATE_SORT { - ext.args = '' - ext.args1 = '-F0xB00 -nt' + ext.args = "" + ext.args1 = "-F0xB00 -nt" ext.args2 = { "-5SPCp -H'${rglines}'" } - ext.args3 = '-mpu' - ext.args4 = { '--write-index -l1' } + ext.args3 = "-mpu" + ext.args4 = { "--write-index -l1" } } - withName: '.*:.*:GENERATE_GENOME:GNU_SORT' { + withName: ".*:.*:GENERATE_GENOME:GNU_SORT" { ext.prefix = { "${meta.id}" } ext.suffix = { "genome" } - ext.args = { '-k2,2 -nr -S${task.memory.mega - 100}M -T .' } + ext.args = { "-k2,2 -nr -S${task.memory.mega - 100}M -T ." } } withName: FASTK_FASTK { diff --git a/modules/local/generate_cram_csv.nf b/modules/local/generate_cram_csv.nf index b596c2d3..fdfd5cb2 100755 --- a/modules/local/generate_cram_csv.nf +++ b/modules/local/generate_cram_csv.nf @@ -1,6 +1,6 @@ process GENERATE_CRAM_CSV { tag "${meta.id}" - label 'process_low' + label 'process_tiny' conda "bioconda::samtools=1.17" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/subworkflows/local/ancestral_gene.nf b/subworkflows/local/ancestral_gene.nf index 2bb6dd5d..ed212d4d 100755 --- a/subworkflows/local/ancestral_gene.nf +++ b/subworkflows/local/ancestral_gene.nf @@ -71,6 +71,7 @@ workflow ANCESTRAL_GENE { versions = ch_versions.ifEmpty(null) } process GrabFiles { + label 'process_tiny' tag "${meta.id}" executor 'local' diff --git a/subworkflows/local/busco_annotation.nf b/subworkflows/local/busco_annotation.nf index e527f741..09dbfe42 100755 --- a/subworkflows/local/busco_annotation.nf +++ b/subworkflows/local/busco_annotation.nf @@ -114,6 +114,7 @@ workflow BUSCO_ANNOTATION { } process GrabFiles { + label 'process_tiny' tag "${meta.id}" executor 'local' diff --git a/subworkflows/local/hic_mapping.nf b/subworkflows/local/hic_mapping.nf index 43fc6fd3..def55530 100755 --- a/subworkflows/local/hic_mapping.nf +++ b/subworkflows/local/hic_mapping.nf @@ -337,6 +337,8 @@ workflow HIC_MAPPING { } process GrabFiles { + label 'process_tiny' + tag "${meta.id}" executor 'local' diff --git a/subworkflows/local/kmer.nf b/subworkflows/local/kmer.nf index 5471c493..2ad695e7 100755 --- a/subworkflows/local/kmer.nf +++ b/subworkflows/local/kmer.nf @@ -51,8 +51,8 @@ workflow KMER { // LOGIC: PRODUCE MERGED READS // CAT_CAT.out.file_out - .map{ meta, reads -> - reads.getName().endsWith('gz') ? [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa.gz'] : [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa'] + .map{ meta, reads -> + reads.getName().endsWith('gz') ? [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa.gz'] : [meta, reads.getParent().toString() + '/' + reads.getBaseName().toString() + '.fa'] } .set{ ch_reads_merged } @@ -61,10 +61,10 @@ workflow KMER { // CAT_CAT.out.file_out .join(ch_reads_merged) - .map{ meta, reads_old, reads_new -> - reads_old.renameTo(reads_new); + .map{ meta, reads_old, reads_new -> + reads_old.renameTo(reads_new); } - + // // MODULE: COUNT KMERS // @@ -77,9 +77,9 @@ workflow KMER { FASTK_FASTK.out.hist .combine(FASTK_FASTK.out.ktab) .combine(reference_tuple) - .map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary -> - tuple( meta_hist, hist, ktab, primary, []) - } + .map{ meta_hist, hist, meta_ktab, ktab, meta_ref, primary -> + tuple( meta_hist, hist, ktab, primary, []) + } .set{ ch_merq } // @@ -95,6 +95,8 @@ workflow KMER { } process GrabFiles { + label 'process_tiny' + tag "${meta.id}" executor 'local' @@ -105,4 +107,4 @@ process GrabFiles { tuple val(meta), path("in/*.fasta.gz") "true" -} \ No newline at end of file +} diff --git a/subworkflows/local/longread_coverage.nf b/subworkflows/local/longread_coverage.nf index 007775aa..bb0fd718 100755 --- a/subworkflows/local/longread_coverage.nf +++ b/subworkflows/local/longread_coverage.nf @@ -386,6 +386,8 @@ workflow LONGREAD_COVERAGE { } process GrabFiles { + label 'process_tiny' + tag "${meta.id}" executor 'local' From 5f8cb5b1a837c026bdf4e79db31114b0c9e83f80 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 17 Nov 2023 12:01:23 +0000 Subject: [PATCH 7/7] Changing FASTK tmp directory arg --- conf/modules.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index a9a170ba..5b451d38 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -308,10 +308,10 @@ process { } withName: FASTK_FASTK { - ext.args = "-k31 -t -P ." + ext.args = "-k31 -t -P." } withName: MERQURYFK_MERQURYFK { - ext.args = "-P ." + ext.args = "-P." } }