diff --git a/CHANGELOG.md b/CHANGELOG.md index 09100fed..0170b9f2 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,15 +28,17 @@ This builds on the initial release by adding subworkflows which generate kmer ba - Updated the minimap2 align module to remove samtools view in preference of paftools for our usecase. - Updated the test.yml inline with the above changes. - Updated the SELFCOMP subworkflow to allow for the parallelisation of the work on large genomes. -- Updated the READ_COVERAGE subworkflow to produce the AVG coverage and STND coverage +- Updated the READ_COVERAGE subworkflow to produce the scaffold based AVG coverage and STND coverage - Updated Modules from NF-Core - mostly relates to module structure rather than software. - Updated the SummaryStats output to include HiC container counts. - Added -T / -t flags where possible to minimise the use of the /tmp directory. - Replaced CONCAT_MUMMER with CATCAT for simplicity. - Removed JUICER from the RAPID entrypoint. -- Removed the csi or tbi logic. CSI is now used by default, this simplified the workflow and moved the logic block previously required. -- Added NF-DOWNLOAD to the CI-CD due to an error which causes incomplete downloaded when downloading a number of images at the same time. -- Added the RAPID_TOL entry point which is more geared towards requirements of Sanger. +- Removed the csi or tbi logic. CSI is now used by default, this simplified the workflow and enlarges the capacity to handle much larger genomes. The logic block previously required was then moved. +- Added NF-DOWNLOAD to the CI-CD due to an error that causes incomplete downloaded when downloading a number of images at the same time. +- Added the RAPID_TOL entry point which is more geared towards the requirements of Sanger. +- Fix a bug in build_alignment_blocks.py to avoid indexing errors happening in large genomes. +- Change output BEDGRAPH from EXTRACT_TELO module. ### Parameters diff --git a/bin/build_alignment_block.py b/bin/build_alignment_block.py index 10424812..1f6c2283 100755 --- a/bin/build_alignment_block.py +++ b/bin/build_alignment_block.py @@ -40,43 +40,38 @@ def build_block(mylist): qlist = [] nlist = [] - for idx, x in enumerate(mylist): + idx = 0 + while idx < len(mylist): + x = mylist[idx] if idx < len(mylist) - 1: qcurrent = int(x[6]) rcurrent = int(x[1]) qnext = mylist[idx + 1][6] - leftover = mylist[idx : len(mylist)] - - # leftd = int(max((x[6]-qcurrent for x in leftover), default=0)) - - leftd = list(x[6] - qmin for x in leftover) - - positives = [x for x in leftd if x > 0] - - min_value = min((positives), default=0) - - indmin = leftd.index(min_value) - - rm = leftover[indmin][1] - - if qcurrent > qmin and qcurrent < qnext and rm == rcurrent: - qmin = qcurrent - qlist.append(idx) - - if qcurrent > qmin and qcurrent < qnext and rm > rcurrent: - nlist.append(idx) - - if qcurrent > qmin and qcurrent > qnext: - nlist.append(idx) - - if qcurrent < qmin and qcurrent > qnext: - nlist.append(idx) + leftd = [int(y[6]) - qmin for y in mylist[idx:]] + positives = list(filter(lambda x: x > 0, leftd)) + + if positives: + min_value = min(positives) + indmin = leftd.index(min_value) + rm = mylist[idx + indmin][1] + + if qcurrent > qmin and qcurrent < qnext and rm == rcurrent: + qmin = qcurrent + qlist.append(idx) + elif qcurrent > qmin and qcurrent < qnext and rm > rcurrent: + nlist.append(idx) + elif qcurrent > qmin and qcurrent > qnext or qcurrent < qmin and qcurrent > qnext: + nlist.append(idx) + else: + idx += 1 + continue if idx == len(mylist) - 1: if mylist[idx][6] > qmin: qlist.append(idx) else: nlist.append(idx) + idx += 1 alignment_chain = [mylist[i] for i in qlist] new_list = [mylist[i] for i in nlist] @@ -122,12 +117,9 @@ def build_block(mylist): while newlist: blocks, newlist = build_block(newlist) - # fileprefix = "".join(random.choices(string.ascii_lowercase + string.digits, k=12)) - # filename = fileprefix + ".block" newblocks = [ [x if i != 3 else y[3] + ":" + str(y[6]) + ":" + str(y[7]) for i, x in enumerate(y)] for y in blocks ] - a = pybedtools.BedTool(newblocks) merged = a.merge(d=100000, c="4,7,8", o="collapse,min,max", delim="|") fo.write(str(merged)) diff --git a/conf/base.config b/conf/base.config index bfa75d11..992e8619 100755 --- a/conf/base.config +++ b/conf/base.config @@ -142,7 +142,7 @@ process { withName: '.*:.*:READ_COVERAGE:MINIMAP2_ALIGN' { cpus = { check_max( 20 * 1, 'cpus' ) } memory = { check_max( 1.GB * ( reference.size() < 2e9 ? 50 : Math.ceil( ( reference.size() / 1e+9 ) * 20 ) * Math.ceil( task.attempt * 1 ) ) , 'memory') } - time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 3.5e9 ? 30 : 60), 'time' ) } + time = { check_max( 1.h * ( reference.size() < 1e9 ? 10 : reference.size() < 10e9 ? 30 : 60), 'time' ) } } withName: '.*:.*:READ_COVERAGE:BEDTOOLS_GENOMECOV' { diff --git a/modules/local/extract_telo.nf b/modules/local/extract_telo.nf index 2d79952e..c39e665c 100755 --- a/modules/local/extract_telo.nf +++ b/modules/local/extract_telo.nf @@ -20,7 +20,7 @@ process EXTRACT_TELO { def VERSION = "9.1" // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. $/ cat "${file}" |awk '{print $2"\t"$4"\t"$5}'|sed 's/>//g' > ${prefix}_telomere.bed - cat "${file}" |awk '{print $2"\t"$4"\t"$5"\t"$6}'|sed 's/>//g' > ${prefix}_telomere.bedgraph + cat "${file}" |awk '{print $2"\t"$4"\t"$5"\t"((($5-$4)<0)?-($5-$4):($5-$4))}' | sed 's/>//g' > ${prefix}_telomere.bedgraph cat <<-END_VERSIONS > versions.yml "${task.process}":