-
Notifications
You must be signed in to change notification settings - Fork 22
/
utils.py
36 lines (29 loc) · 1.06 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import os
import torch
from torch.autograd import Variable
def read_words(data_dir, seq_len, filter_h):
words = []
for file in os.listdir(data_dir):
print('Load', file)
with open(os.path.join(data_dir, file), 'r') as f:
lines = f.readlines()
for line in lines:
tokens = line.split()
# TODO currently only choose specified length sentence
if len(tokens) == seq_len - 2:
# TODO i'm not sure about the padding...
words.extend((['<pad>']*int(filter_h/2)) + ['<s>'] + tokens + ['</s>'])
return words
def create_batches(data, batch_size, seq_len):
ret_data = []
X, Y = [], []
for i in range(0, len(data)-(seq_len+1), seq_len):
X.append(data[i:i+seq_len])
Y.append(data[i+seq_len])
for i in range(0, len(X)-batch_size, batch_size):
ret_data.append((X[i:i+batch_size], Y[i:i+batch_size]))
return ret_data
def to_var(x):
if torch.cuda.is_available():
x = x.cuda()
return Variable(x)