forked from asweigart/codebreaker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
freqFinder.py
202 lines (158 loc) · 7.77 KB
/
freqFinder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# Frequency Finder
# http://inventwithpython.com/codebreaker (BSD Licensed)
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
englishLetterFreq = {'E': 12.70, 'T': 9.06, 'A': 8.17, 'O': 7.51, 'I': 6.97, 'N': 6.75, 'S': 6.33, 'H': 6.09, 'R': 5.99, 'D': 4.25, 'L': 4.03, 'C': 2.78, 'U': 2.76, 'M': 2.41, 'W': 2.36, 'F': 2.23, 'G': 2.02, 'Y': 1.97, 'P': 1.93, 'B': 1.29, 'V': 0.98, 'K': 0.77, 'J': 0.15, 'X': 0.15, 'Q': 0.10, 'Z': 0.07}
englishTrigramFreq = {'THE': 3.508232, 'AND': 1.593878, 'ING': 1.147042, 'HER': 0.822444, 'HAT': 0.650715, 'HIS': 0.596748, 'THA': 0.593593, 'ERE': 0.560594, 'FOR': 0.555372, 'ENT': 0.530771, 'ION': 0.506454, 'TER': 0.461099, 'WAS': 0.460487, 'YOU': 0.437213, 'ITH': 0.43125, 'VER': 0.430732, 'ALL': 0.422758, 'WIT': 0.39729, 'THI': 0.394796, 'TIO': 0.378058}
englishFreqOrder = ('E', 'T', 'A', 'O', 'I', 'N', 'S', 'H', 'R', 'D', 'L', 'C', 'U', 'M', 'W', 'F', 'G', 'Y', 'P', 'B', 'V', 'K', 'J', 'X', 'Q', 'Z')
ETAOIN = ''.join(englishFreqOrder)
LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
TRIGRAM_THRESHOLD = 2
TRIGRAM_MATCH_RANGE = 30
def main():
import random
sonnet = """WHEN, IN DISGRACE WITH FORTUNE AND MEN'S EYES,
I ALL ALONE BEWEEP MY OUTCAST STATE,
AND TROUBLE DEAF HEAVEN WITH MY BOOTLESS CRIES,
AND LOOK UPON MYSELF AND CURSE MY FATE,
WISHING ME LIKE TO ONE MORE RICH IN HOPE,
FEATURED LIKE HIM, LIKE HIM WITH FRIENDS POSSESSED,
DESIRING THIS MAN'S ART, AND THAT MAN'S SCOPE,
WITH WHAT I MOST ENJOY CONTENTED LEAST,
YET IN THESE THOUGHTS MYSELF ALMOST DESPISING,
HAPLY I THINK ON THEE, AND THEN MY STATE,
LIKE TO THE LARK AT BREAK OF DAY ARISING
FROM SULLEN EARTH, SINGS HYMNS AT HEAVEN'S GATE
FOR THY SWEET LOVE REMEMBERED SUCH WEALTH BRINGS,
THAT THEN I SCORN TO CHANGE MY STATE WITH KINGS."""
loremIpsum = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam porta varius ante eget tincidunt. Pellentesque placerat, turpis nec elementum consectetur, enim nulla molestie velit, quis semper erat lacus quis metus.'.upper()
print(englishFreqMatch("""Originally conceived to follow the UK Prestel specifications, and developed on contract by IBM Germany, Btx added a number of additional features before launch, including some inspired by the French Minitel service, to create a new display standard of its own, which in 1981 was designated the CEPT1 profile. In 1995 an enhanced backward-compatible standard called Kernel for Intelligent Communication Terminals (KIT) was announced, but this never really gained acceptance. CEPT permits the transmission of graphical pages with a resolution of 480 by 250 pixels, where 32 out of a palette of 4096 colors could be shown at the same time. This corresponds to the technical possibilities of the early 1980s."""))
print('Shakespeare\'s Sonnet #29')
print(sonnet)
print()
print('Letter Frequencies of Sonnet #29:')
print(getLetterCount(sonnet))
print()
print('Frequency score of Sonnet #29:')
print(englishFreqMatch(sonnet))
print()
# Scrambling Sonnet #29
scrambled = list(sonnet)
random.shuffle(scrambled)
scrambled = ''.join(scrambled)
print('Frequency score of Scrambled Sonnet #29:')
print(englishFreqMatch(scrambled))
print()
print('Frequency score of Lorem Ipsum text:')
print(englishFreqMatch(loremIpsum))
print()
print('Frequency score of alphabet:')
print(englishFreqMatch(LETTERS))
print()
print('Frequency score of alphabet x 100:')
print(englishFreqMatch(LETTERS * 100))
print()
print('Frequency score of "AAAAAAAAAAAAAAAH":')
print(englishFreqMatch("AAAAAAAAAAAAAAAH"))
print()
print('Frequency score of "VDIUFRFDSFEWAFDSAFLKHFDSALKFA":')
print(englishFreqMatch("VDIUFRFDSFEWAFDSAFLKHFDSALKFA"))
print()
def getLetterCount(message):
# Returns a dictionary with keys of single letters and values of the
# count of how many times they appear in the message parameter.
letterToCount = {}
for letter in LETTERS:
letterToCount[letter] = 0 # intialize each letter to 0
for letter in message:
if letter in LETTERS:
letterToCount[letter] += 1
return letterToCount
def getLetterFreq(message):
# Returns a dictionary with keys of single letters and values of the
# percentage of their frequency in the message parameter.
counts = getLetterCount(message)
totalCount = 0
for letter in counts:
totalCount += counts[letter]
letterToFreq = {}
for letter in counts:
letterToFreq[letter] = round(counts[letter] * 100 / totalCount, 2)
return letterToFreq
def getFrequencyOrder(message):
# Returns a string of the alphabet letters arranged in order of most
# frequently occurring in the message parameter.
message = message.upper()
# first, get a dictionary of each letter and its frequency count from the message
letterToFreq = getLetterCount(message)
# second, make a dictionary of each frequency count to each letter(s) with that frequency
freqToLetter = {}
for letter in LETTERS:
freqToLetter[letterToFreq[letter]] = [] # intialize to a blank list
for letter in LETTERS:
freqToLetter[letterToFreq[letter]].append(letter)
# third, put each list of letters in reverse "ETAOIN" order, and then convert it to a string
for freq in freqToLetter:
freqToLetter[freq].sort(key=lambda x: ETAOIN.find(x), reverse=True)
freqToLetter[freq] = ''.join(freqToLetter[freq])
# fourth, convert the freqToLetter dictionary to a list of tuple pairs (key, value), then sort them
freqPairs = list(freqToLetter.items())
freqPairs.sort(key=lambda x: x[0], reverse=True)
# fifth, now that the letters are ordered by frequency, extract all the letters for the final string
freqOrder = ''
for freqPair in freqPairs:
freqOrder += freqPair[1]
return freqOrder
def englishFreqMatch(message):
# Return the number of matches that the string in the message parameter
# has when its letter frequency is compared to English letter frequency.
# A "match" is how many of its six most frequent and six least frequent
# letters is among the six most frequent and six least frequent letters
# for English.
freqOrder = getFrequencyOrder(message)
print(freqOrder)
matches = 0
# Find how many matches for the six most common letters there are.
for commonLetter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]:
matches += 1
# Find how many matches for the six least common letters there are.
for uncommonLetter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]:
matches += 1
return matches
def englishTrigramMatch(message):
# Return True if the string in the message parameter matches the
# trigram frequency of English.
# Remove the non-letter characters from message
message = message.upper()
lettersOnly = []
for character in message:
if character in LETTERS:
lettersOnly.append(character)
message = ''.join(lettersOnly)
# Count the trigrams in message
total = 0
trigrams = {}
for i in range(len(message) - 2):
trigram = message[i:i+3]
if trigram in trigrams:
trigrams[trigram] += 1
else:
trigrams[trigram] = 1
total +=1
# Sort the trigrams by frequency
topFreqs = list(trigrams.items())
topFreqs.sort(key=lambda x: x[1], reverse=True)
topFreqLetters = []
for item in topFreqs:
topFreqLetters.append(item[0])
trigramFreqs = {}
for trigram in trigrams:
trigramFreqs[trigram] = trigrams[trigram] / total * 100
matches = 0
for commonTrig in englishTrigramFreq:
if commonTrig in topFreqLetters[:TRIGRAM_MATCH_RANGE]:
matches += 1
return matches >= TRIGRAM_THRESHOLD
if __name__ == '__main__':
main()