forked from GraphGrailAi/tolkein_text
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlstm_class.py
73 lines (47 loc) · 2.58 KB
/
lstm_class.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import torch
import torch.nn as nn
#===========================================#
# Description #
#===========================================#
# This is the LSTM Neural Network Class. The architecture of the network is this:
# input -> embedding layer -> LSTM layers -> dropout layer -> fully connected linear layer -> output
# input is a list of n=9 words represented by their ids in the vocabulary, label is the following word
# output is a list of logits over the set of words, where higher means a higher chance that that word follows
# applying softmax to the output turns it into a pprobability distribution over all the words
#===========================================#
# LSTM Recurrent Neural Network Class #
#===========================================#
class LSTM(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, n_layers, drop_prob=0.2):
super(LSTM, self).__init__()
# network size parameters
self.n_layers = n_layers
self.hidden_dim = hidden_dim
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
# the layers of the network
self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
self.lstm = nn.LSTM(self.embedding_dim, self.hidden_dim, self.n_layers, dropout=drop_prob, batch_first=True)
self.dropout = nn.Dropout(drop_prob)
self.fc = nn.Linear(self.hidden_dim, self.vocab_size)
def forward(self, input, hidden):
# Performs a forward pass of the model on some input and hidden state.
batch_size = input.size(0)
# pass through embeddings layer
embeddings_out = self.embedding(input)
# pass through LSTM layers
lstm_out, hidden = self.lstm(embeddings_out, hidden)
# slice lstm_out to just get output of last element of the input sequence
lstm_out = lstm_out[:, -1]
# pass through dropout layer
dropout_out = self.dropout(lstm_out)
#pass through fully connected layer - don't need to use Softmax activation func as CrossEntropyLoss applies it
fc_out = self.fc(dropout_out)
# return final output and hidden state
return fc_out, hidden
def init_hidden(self, batch_size):
#Initializes hidden state
# Create two new tensors `with sizes n_layers x batch_size x hidden_dim,
# initialized to zero, for hidden state and cell state of LSTM
hidden = (torch.zeros(self.n_layers, batch_size, self.hidden_dim), torch.zeros(self.n_layers, batch_size, self.hidden_dim))
return hidden