-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdelta2paf.py
114 lines (95 loc) · 3.41 KB
/
delta2paf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python
import sys
import gzip
import argparse
def delta2paf():
"""
Essentially a translation of paftools.js delta2paf
https://github.com/lh3/minimap2/tree/master/misc
"""
parser = argparse.ArgumentParser(description="Convert a Nucmer delta file to a PAF file.")
parser.add_argument("delta_file", metavar="<alns.delta>", type=str, help="delta file to convert.")
args = parser.parse_args()
delta_file = args.delta_file
seen_gt = False
if delta_file[-3:] == ".gz":
f = gzip.open(delta_file)
else:
f = open(delta_file, "r")
for line in f:
if not isinstance(line, str):
line = line.decode("utf-8")
t = line.rstrip().split(" ")
# Check to see if we have started a new ref/query pair
if line.startswith(">"):
rname, qname, rlen, qlen = t[0][1:], t[1], int(t[2]), int(t[3])
seen_gt = True
continue
# Continue if we haven't reached alignments yet
if not seen_gt:
continue
if len(t) == 7:
for i in range(5):
t[i] = int(t[i])
strand = True if (t[0] < t[1] and t[2] < t[3]) or (t[0] > t[1] and t[2] > t[3]) else False
# Reference/query start/end
rs = (t[0] - 1) if t[0] < t[1] else (t[1] - 1)
re = t[1] if t[1] > t[0] else t[0]
qs = (t[2] - 1) if t[2] < t[3] else (t[3] - 1)
qe = t[3] if t[3] > t[2] else t[2]
x, y = 0, 0
NM = t[4]
cigar = []
elif len(t) == 1:
d = int(t[0])
if d == 0:
blen, cigar_str = 0, []
if re - rs - x != qe - qs - y:
sys.stderr.write("{}, {}, {}\n".format(rs, re, x))
sys.stderr.write("{}, {}, {}\n".format(qs, qe, y))
sys.stderr.write(rname + " " + qname)
raise RuntimeError("inconsistent alignment")
cigar.append((re - rs - x) << 4)
for i in cigar:
blen += i >> 4
cigar_str.append(str((i >> 4)) + "MID"[i & 0xf])
out_strand = "+" if strand > 0 else "-"
print("\t".join([
qname,
str(qlen),
str(qs),
str(qe),
out_strand,
rname,
str(rlen),
str(rs),
str(re),
str(blen - NM),
str(blen),
"0",
"NM:i:" + str(NM),
"cg:Z:" + "".join(cigar_str)
]))
elif d > 0:
l = d - 1
x += l + 1
y += l
if l:
cigar.append(l << 4)
if len(cigar) > 0 and (cigar[-1] & 0xf) == 2:
cigar[-1] += 1 << 4
else:
cigar.append(1 << 4 | 2) # deletion
else:
l = -d - 1
x += l
y += l + 1
if l:
cigar.append(l << 4)
if len(cigar) > 0 and (cigar[-1] & 0xf) == 1:
cigar[-1] += 1 << 4
else:
cigar.append(1 << 4 | 1) # insertion
f.close()
if __name__ == "__main__":
delta2paf()