-
Notifications
You must be signed in to change notification settings - Fork 0
/
12.py
74 lines (62 loc) · 2.39 KB
/
12.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def revers_compliment(DNA):
reverse = DNA[::-1]
print(f"{reverse} = Reverse")
compliment = ''
for b in reverse:
if b == 'A': compliment = compliment + 'T'
if b == 'T': compliment = compliment + 'A'
if b == 'C': compliment = compliment + 'G'
if b == 'G': compliment = compliment + 'C'
print(f"{compliment} = Reverse Compliment")
return compliment
def HammingDistance(a, b):
a = a.lower()
b = b.lower()
hd = 0
for i in range(len(a)):
if a[i] != b[i]:
hd += 1
return(hd)
def Suffix(Pattern):
return Pattern[1:]
def Neighbors(Pattern, d):
if d == 0:
return {Pattern}
if len(Pattern) == 1:
return {'A', 'C', 'G', 'T'}
Neighborhood = set()
SuffixNeighbors = Neighbors(Suffix(Pattern), d)
# for each string Text from SuffixNeighbors:
for Text in SuffixNeighbors:
if HammingDistance(Suffix(Pattern), Text) < d:
# for each nucleotide x:
for x in 'ATCG':
# add x • Text to Neighborhood
Neighborhood.add(x+Text)
else:
# add FirstSymbol(Pattern) • Text to Neighborhood
Neighborhood.add(Pattern[0]+Text)
return Neighborhood
def FrequentWordsWithMismatches(Text, k, d):
Patterns = []
freqMap ={}
n = len(Text)
for i in range(1+n - k): #check if any error
Pattern = Text[i: i+k]
# RevComp = revers_compliment(Pattern)
neighborhood = list(Neighbors(Pattern, d))
for j in range(len(neighborhood)-1): #check if any error
neighbor = neighborhood[j]
if neighbor in freqMap:
freqMap[neighbor] = freqMap[neighbor] + 1
else:
freqMap[neighbor] = 1
m = max(freqMap.values())
for key in freqMap:
if freqMap[key] == m:
Patterns.append(key)
# append Pattern to Patterns
return Patterns
sample = "CTCCCGCTCGCTTTAATTAATTAAAAGACGCTAAGACTCCCGCTTTAAAAGACTCCCGCTCTCCCTCCTTAATCACGCTAAGACGCTAAGATTAATCACTCCTTAATTAACGCTAAGATTAACTCCTCACTCCTTAATTAACGCTCTCCTTAATCAAAGACTCCTCACGCTCTCCTTAACGCTTTAACGCTTCATCATCATCATCACGCTTTAACTCCAAGACGCTAAGACGCTTCAAAGATCATCACTCCAAGAAAGACGCTAAGATCATCATCACGCTCTCCCGCTTCATTAACTCCCTCCTCACGCTTTAACTCCCTCCCGCTCTCCTCAAAGATCACGCTTCAAAGAAAGACGCTTTAATCATTAACGCTTCATCAAAGACTCCAAGACTCC"
res = FrequentWordsWithMismatches(sample, 7, 2)
print(res)