-
Notifications
You must be signed in to change notification settings - Fork 0
/
sample.py
41 lines (36 loc) · 1.26 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import pickle
import time
import numpy as np
max_ngrams = 4 # ngram size
num_generate = 10 # number of passwords to generate
f = open("results.txt", 'w+')
# generate a single new password using a stats dict
# created during the training phase
def gen_password(stats, n):
output = '`' * n
for i in range(100):
output += gen_char(output[i:i + n])
if output[-1] == '\n':
return output[0:-1].replace('`', '')[0:-1]
# Sample a character if the ngram appears in the stats dict.
# Otherwise recursively decrement n to try smaller grams in
# hopes to find a match (e.g. "off" becomes "of").
# This is a deviation from a vanilla markov text generator
# which one n-size. This generator uses all values <= n.
# preferencing higher values of n first.
def gen_char(ngram):
if ngram in stats:
# sample from the probability distribution
return np.random.choice(stats[ngram].keys(), p=stats[ngram].values())
else:
print('{} not in stats dict'.format(ngram))
return gen_char(ngram[0:-1])
with open('data/{}-gram.pickle'.format(max_ngrams)) as file:
stats = pickle.load(file)
# start = time.time()
for i in xrange(num_generate):
pw = gen_password(stats, max_ngrams)
if pw is not None:
# print(pw)
f.write(pw+"\n")
# print('finished in {:.2f} seconds'.format(time.time() - start))