From bde04a4c2a2f0f66027f4329cf9b7f6cab4c0444 Mon Sep 17 00:00:00 2001 From: yumisims Date: Wed, 3 Apr 2024 22:38:23 +0100 Subject: [PATCH 1/4] selfcomp fix and telomere output fix --- bin/build_alignment_block.py | 53 +++++++++++++++-------------------- modules/local/extract_telo.nf | 2 +- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/bin/build_alignment_block.py b/bin/build_alignment_block.py index 10424812..d4b75fb0 100755 --- a/bin/build_alignment_block.py +++ b/bin/build_alignment_block.py @@ -40,49 +40,45 @@ def build_block(mylist): qlist = [] nlist = [] - for idx, x in enumerate(mylist): + idx = 0 + while idx < len(mylist): + x = mylist[idx] if idx < len(mylist) - 1: qcurrent = int(x[6]) rcurrent = int(x[1]) qnext = mylist[idx + 1][6] - leftover = mylist[idx : len(mylist)] - - # leftd = int(max((x[6]-qcurrent for x in leftover), default=0)) - - leftd = list(x[6] - qmin for x in leftover) - - positives = [x for x in leftd if x > 0] - - min_value = min((positives), default=0) - - indmin = leftd.index(min_value) - - rm = leftover[indmin][1] - - if qcurrent > qmin and qcurrent < qnext and rm == rcurrent: - qmin = qcurrent - qlist.append(idx) - - if qcurrent > qmin and qcurrent < qnext and rm > rcurrent: - nlist.append(idx) - - if qcurrent > qmin and qcurrent > qnext: - nlist.append(idx) - - if qcurrent < qmin and qcurrent > qnext: - nlist.append(idx) + leftd = [int(y[6]) - qmin for y in mylist[idx:]] + positives = list(filter(lambda x: x > 0, leftd)) + + if positives: + min_value = min(positives) + indmin = leftd.index(min_value) + rm = mylist[idx + indmin][1] + + if qcurrent > qmin and qcurrent < qnext and rm == rcurrent: + qmin = qcurrent + qlist.append(idx) + elif qcurrent > qmin and qcurrent < qnext and rm > rcurrent: + nlist.append(idx) + elif qcurrent > qmin and qcurrent > qnext or qcurrent < qmin and qcurrent > qnext: + nlist.append(idx) + else: + idx += 1 + continue if idx == len(mylist) - 1: if mylist[idx][6] > qmin: qlist.append(idx) else: nlist.append(idx) + idx += 1 alignment_chain = [mylist[i] for i in qlist] new_list = [mylist[i] for i in nlist] return alignment_chain, new_list + #########main########## parser = optparse.OptionParser(version="%prog 1.0") @@ -122,12 +118,9 @@ def build_block(mylist): while newlist: blocks, newlist = build_block(newlist) - # fileprefix = "".join(random.choices(string.ascii_lowercase + string.digits, k=12)) - # filename = fileprefix + ".block" newblocks = [ [x if i != 3 else y[3] + ":" + str(y[6]) + ":" + str(y[7]) for i, x in enumerate(y)] for y in blocks ] - a = pybedtools.BedTool(newblocks) merged = a.merge(d=100000, c="4,7,8", o="collapse,min,max", delim="|") fo.write(str(merged)) diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index 2d79952e..c39e665c 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -20,7 +20,7 @@ process EXTRACT_TELO { def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. $/ cat "${file}" |awk '{print $2"\t"$4"\t"$5}'|sed 's/>//g' > ${prefix}_telomere.bed - cat "${file}" |awk '{print $2"\t"$4"\t"$5"\t"$6}'|sed 's/>//g' > ${prefix}_telomere.bedgraph + cat "${file}" |awk '{print $2"\t"$4"\t"$5"\t"((($5-$4)<0)?-($5-$4):($5-$4))}' | sed 's/>//g' > ${prefix}_telomere.bedgraph cat <<-END_VERSIONS > versions.yml "${task.process}": From 45df91d1f2683b9f2b89424b1bd5b66362f74d71 Mon Sep 17 00:00:00 2001 From: yumisims Date: Wed, 3 Apr 2024 22:42:12 +0100 Subject: [PATCH 2/4] selfcomp fix and telomere output fix --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index bfa75d11..992e8619 100755 --- a/conf/base.config +++ b/conf/base.config @@ -142,7 +142,7 @@ process { withName: '.*:.*:READ_COVERAGE:MINIMAP2_ALIGN' { cpus = { check_max( 20 * 1, 'cpus' ) } memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 ) ) , 'memory') } - time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 3.5e9 ? 30 : 60), 'time' ) } + time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 60), 'time' ) } } withName: '.*:.*:READ_COVERAGE:BEDTOOLS_GENOMECOV' { From c238fed121d15b99258124f70e9eb2cfcafde2dd Mon Sep 17 00:00:00 2001 From: yumisims Date: Wed, 3 Apr 2024 22:48:25 +0100 Subject: [PATCH 3/4] selfcomp fix and telomere output fix --- bin/build_alignment_block.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/build_alignment_block.py b/bin/build_alignment_block.py index d4b75fb0..1f6c2283 100755 --- a/bin/build_alignment_block.py +++ b/bin/build_alignment_block.py @@ -78,7 +78,6 @@ def build_block(mylist): return alignment_chain, new_list - #########main########## parser = optparse.OptionParser(version="%prog 1.0") From a235edfa0df8006eec57740e62708aacd48beb24 Mon Sep 17 00:00:00 2001 From: YSims Date: Thu, 4 Apr 2024 08:55:05 +0100 Subject: [PATCH 4/4] Update CHANGELOG.md --- CHANGELOG.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09100fed..0170b9f2 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,15 +28,17 @@ This builds on the initial release by adding subworkflows which generate kmer ba - Updated the minimap2 align module to remove samtools view in preference of paftools for our usecase. - Updated the test.yml inline with the above changes. - Updated the SELFCOMP subworkflow to allow for the parallelisation of the work on large genomes. -- Updated the READ_COVERAGE subworkflow to produce the AVG coverage and STND coverage +- Updated the READ_COVERAGE subworkflow to produce the scaffold based AVG coverage and STND coverage - Updated Modules from NF-Core - mostly relates to module structure rather than software. - Updated the SummaryStats output to include HiC container counts. - Added -T / -t flags where possible to minimise the use of the /tmp directory. - Replaced CONCAT_MUMMER with CATCAT for simplicity. - Removed JUICER from the RAPID entrypoint. -- Removed the csi or tbi logic. CSI is now used by default, this simplified the workflow and moved the logic block previously required. -- Added NF-DOWNLOAD to the CI-CD due to an error which causes incomplete downloaded when downloading a number of images at the same time. -- Added the RAPID_TOL entry point which is more geared towards requirements of Sanger. +- Removed the csi or tbi logic. CSI is now used by default, this simplified the workflow and enlarges the capacity to handle much larger genomes. The logic block previously required was then moved. +- Added NF-DOWNLOAD to the CI-CD due to an error that causes incomplete downloaded when downloading a number of images at the same time. +- Added the RAPID_TOL entry point which is more geared towards the requirements of Sanger. +- Fix a bug in build_alignment_blocks.py to avoid indexing errors happening in large genomes. +- Change output BEDGRAPH from EXTRACT_TELO module. ### Parameters