-
Notifications
You must be signed in to change notification settings - Fork 2
/
sample.py
65 lines (49 loc) · 2.13 KB
/
sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import argparse
import os
import json
import numpy as np
from model import build_model, load_weights
from keras.models import Sequential, load_model
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding
DATA_DIR = './data'
MODEL_DIR = './model'
# Buliding a Sequential Model
def build_sample_model(vocab_size):
model = Sequential()
# Embedding word to numerical vector of input_dim = vocab_size and output_dim = 512
model.add(Embedding(vocab_size, 512, batch_input_shape=(1, 1)))
# Creating 3 LSTM layers
for i in range(3):
model.add(LSTM(256, return_sequences=(i != 2), stateful=True)) # Total 256 LSTM units
# Adding dropouts after every LSTM layer
model.add(Dropout(0.2))
model.add(Dense(vocab_size))
model.add(Activation('softmax'))
return model
def sample(epoch, header, num_chars):
with open(os.path.join(DATA_DIR, 'char_to_idx.json')) as f:
char_to_idx = json.load(f)
idx_to_char = {i: ch for (ch, i) in char_to_idx.items()}
vocab_size = len(char_to_idx)
model = build_sample_model(vocab_size)
load_weights(epoch, model)
model.save(os.path.join(MODEL_DIR, 'model.{}.h5'.format(epoch)))
sampled = [char_to_idx[c] for c in header]
print(sampled)
for i in range(num_chars):
batch = np.zeros((1, 1))
if sampled:
batch[0, 0] = sampled[-1]
else:
batch[0, 0] = np.random.randint(vocab_size)
result = model.predict_on_batch(batch).ravel()
sample = np.random.choice(range(vocab_size), p=result)
sampled.append(sample)
return ''.join(idx_to_char[c] for c in sampled)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Sample some text from the trained model.')
parser.add_argument('epoch', type=int, help='epoch checkpoint to sample from')
parser.add_argument('--seed', default='', help='initial seed for the generated text')
parser.add_argument('--len', type=int, default=512, help='number of characters to sample (default 512)')
args = parser.parse_args()
print(sample(args.epoch, args.seed, args.len))