-
Notifications
You must be signed in to change notification settings - Fork 0
/
approx.go
125 lines (100 loc) · 2.47 KB
/
approx.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package gostr
import (
"fmt"
"regexp"
"strconv"
"strings"
)
// ApproxEdit is a type for edit operations
type ApproxEdit int
// EditOps is a type for a sequence of edit operations
type EditOps = []ApproxEdit
// Approximative matching edit operations.
const (
M ApproxEdit = iota // Match/mismatch operations
I = iota // Insertion operations
D = iota // Deletion operations
)
var editsToString = map[ApproxEdit]string{
M: "M", I: "I", D: "D",
}
var stringToEdits = map[string]ApproxEdit{
"M": M, "I": I, "D": D,
}
// OpsToCigar turns a list of ops into a cigar.
func OpsToCigar(ops EditOps) string {
var (
res = []string{}
i, j int
)
for ; i < len(ops); i = j {
for j = i + 1; j < len(ops) && ops[i] == ops[j]; j++ {
}
res = append(res, fmt.Sprintf("%d%s", j-i, editsToString[ops[i]]))
}
return strings.Join(res, "")
}
// CigarToOps turns a cigar string into the list of edit ops.
func CigarToOps(cigar string) (EditOps, error) {
r := regexp.MustCompile(`\d+[MID]`)
ops := EditOps{}
// This check is really inefficient, but I don't have time to
// implement a better parser of cigars right now. A scan from
// the beginning that chumps of digits would be a lot faster.
// I'll leave that for later...
for _, s := range r.Split(cigar, -1) {
if s != "" {
return ops, &InvalidCigar{x: cigar}
}
}
for _, op := range r.FindAllString(cigar, -1) {
rep, _ := strconv.Atoi(op[:len(op)-1])
opcode := stringToEdits[string(op[len(op)-1])]
for i := 0; i < rep; i++ {
ops = append(ops, opcode)
}
}
return ops, nil
}
// ExtractAlignment extracts a pairwise alignment from the reference, x,
// the read, p, the position and the edits cigar.
func ExtractAlignment(x, p string, pos int, cigar string) (subx, subp string, err error) {
i, j := pos, 0
ops, err := CigarToOps(cigar)
if err != nil {
return "", "", err
}
for _, op := range ops {
switch op {
case M:
subx += string(x[i])
subp += string(p[j])
i++
j++
case I:
subx += "-"
subp += string(p[j])
j++
case D:
subx += string(x[i])
subp += "-"
i++
}
}
return subx, subp, nil
}
// CountEdits counts the number of edits in the local alignment between x and p
// specified by pos and cigar
func CountEdits(x, p string, pos int, cigar string) (int, error) {
edits := 0
subx, subp, err := ExtractAlignment(x, p, pos, cigar)
if err != nil {
return 0, err
}
for i := range subx {
if subx[i] != subp[i] {
edits++
}
}
return edits, nil
}