Skip to content

Commit

Permalink
skip invalid gap instead of exit
Browse files Browse the repository at this point in the history
  • Loading branch information
Echoring committed Mar 11, 2024
1 parent 1169520 commit e1a2f72
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 7 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Task include:
- [CentroMiner](#CentroMiner): centromere candidate prediction

## Version Change log
1.1.8
- Gapfiller will throw a warning instead of error when flanking sequence contains gap.

1.1.7
- Support RepeatMasker's TE annotation format for CentroMiner module.

Expand Down
2 changes: 1 addition & 1 deletion quartet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys

usage = '''quarTeT: Telomere-to-telomere Toolkit
version 1.1.7
version 1.1.8
Usage: python3 quartet.py <module> <parameters>
Expand Down
14 changes: 8 additions & 6 deletions quartet_gapfiller.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,22 @@ def GapFiller(args):
for gapsite in gapsitelist:
start = max(gapsite[0] - flanking, 0)
end = min(gapsite[1] + flanking, len(seq))
flankingdict[f'{sid}.{i}.L'] = seq[start:gapsite[0]]
flankingdict[f'{sid}.{i}.R'] = seq[gapsite[1]:end]
leftseq = seq[start:gapsite[0]]
rightseq = seq[gapsite[1]:end]
if 'N'*100 in leftseq or 'N'*100 in rightseq:
print(f'[Warning] Flanking sequence of gap {sid}.{i} contains another gap. This indicates two gaps are too close and a very small contig is placed in between.')
else:
flankingdict[f'{sid}.{i}.L'] = seq[start:gapsite[0]]
flankingdict[f'{sid}.{i}.R'] = seq[gapsite[1]:end]
gapdict[f'{sid}.{i}'] = seq[gapsite[0]:gapsite[1]]
i += 1
if flankingdict == {}:
print('[Error] Input genome does not have gap.')
print('[Error] Input genome does not have valid gap.')
sys.exit(0)
subprocess.run(f'mkdir tmp', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
flankingfastafile = f'tmp/{prefix}.gap.flanking.fasta'
with open(flankingfastafile, 'w') as f:
for sid, seq in flankingdict.items():
if 'N'*100 in seq:
print('[Error] Flanking sequence contains gap. Recommend to lower -f parameter or check your file.')
sys.exit(0)
f.write(f'>{sid}\n{seq}\n')
del draftgenomedict

Expand Down

0 comments on commit e1a2f72

Please sign in to comment.