forked from asweigart/codebreaker
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfreqAnalysis.py
83 lines (61 loc) · 3.22 KB
/
freqAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Frequency Finder
# http://inventwithpython.com/hacking (BSD Licensed)
# frequency taken from http://en.wikipedia.org/wiki/Letter_frequency
englishLetterFreq = {'E': 12.70, 'T': 9.06, 'A': 8.17, 'O': 7.51, 'I': 6.97, 'N': 6.75, 'S': 6.33, 'H': 6.09, 'R': 5.99, 'D': 4.25, 'L': 4.03, 'C': 2.78, 'U': 2.76, 'M': 2.41, 'W': 2.36, 'F': 2.23, 'G': 2.02, 'Y': 1.97, 'P': 1.93, 'B': 1.29, 'V': 0.98, 'K': 0.77, 'J': 0.15, 'X': 0.15, 'Q': 0.10, 'Z': 0.07}
ETAOIN = 'ETAOINSHRDLCUMWFGYPBVKJXQZ'
LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
def getLetterCount(message):
# Returns a dictionary with keys of single letters and values of the
# count of how many times they appear in the message parameter.
letterCount = {'A': 0, 'B': 0, 'C': 0, 'D': 0, 'E': 0, 'F': 0, 'G': 0, 'H': 0, 'I': 0, 'J': 0, 'K': 0, 'L': 0, 'M': 0, 'N': 0, 'O': 0, 'P': 0, 'Q': 0, 'R': 0, 'S': 0, 'T': 0, 'U': 0, 'V': 0, 'W': 0, 'X': 0, 'Y': 0, 'Z': 0}
for letter in message.upper():
if letter in LETTERS:
letterCount[letter] += 1
return letterCount
def getItemAtIndexZero(x):
return x[0]
def getFrequencyOrder(message):
# Returns a string of the alphabet letters arranged in order of most
# frequently occurring in the message parameter.
# first, get a dictionary of each letter and its frequency count
letterToFreq = getLetterCount(message)
# second, make a dictionary of each frequency count to each letter(s)
# with that frequency
freqToLetter = {}
for letter in LETTERS:
if letterToFreq[letter] not in freqToLetter:
freqToLetter[letterToFreq[letter]] = [letter]
else:
freqToLetter[letterToFreq[letter]].append(letter)
# third, put each list of letters in reverse "ETAOIN" order, and then
# convert it to a string
for freq in freqToLetter:
freqToLetter[freq].sort(key=ETAOIN.find, reverse=True)
freqToLetter[freq] = ''.join(freqToLetter[freq])
# fourth, convert the freqToLetter dictionary to a list of tuple
# pairs (key, value), then sort them
freqPairs = list(freqToLetter.items())
freqPairs.sort(key=getItemAtIndexZero, reverse=True)
# fifth, now that the letters are ordered by frequency, extract all
# the letters for the final string
freqOrder = []
for freqPair in freqPairs:
freqOrder.append(freqPair[1])
return ''.join(freqOrder)
def englishFreqMatchScore(message):
# Return the number of matches that the string in the message
# parameter has when its letter frequency is compared to English
# letter frequency. A "match" is how many of its six most frequent
# and six least frequent letters is among the six most frequent and
# six least frequent letters for English.
freqOrder = getFrequencyOrder(message)
matchScore = 0
# Find how many matches for the six most common letters there are.
for commonLetter in ETAOIN[:6]:
if commonLetter in freqOrder[:6]:
matchScore += 1
# Find how many matches for the six least common letters there are.
for uncommonLetter in ETAOIN[-6:]:
if uncommonLetter in freqOrder[-6:]:
matchScore += 1
return matchScore