forked from rizwan09/LanModeledProgramGeneartion-master
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rough.py
129 lines (110 loc) · 4.89 KB
/
rough.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import util, data #, helper, train
import torch, random
from torch import optim
import torch.nn as nn
# from encoder import EncoderRNN
from embedding_layer import Embedding_Drop_Layer
from torch.autograd import Variable
import sys
args = util.get_args()
###############################################################################
# Author: Md Rizwan Parvez
# Project: Quora Duplicate Question Detection
# Date Created: 3/27/2017
# many codes are adopted from Wasi Ahmad QuestionClassifier
# File Description: This is the main script from where all experimental
# execution begins.
###############################################################################
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
#### fix this
# from nn_layer import EmbeddingLayer
# from encoder import EncoderRNN
class LanguageModel(nn.Module):
def __init__(self, dictionary, embeddings_index, args):
""""Constructor of the class."""
super(LanguageModel, self).__init__()
self.dictionary = dictionary
self.embeddings_index = embeddings_index
self.config = args
#### fix this
# self.num_directions = 2 if args.bidirection else 1
self.embedding = Embedding_Drop_Layer(len(dictionary), self.config.emsize, self.config.dropout)
#self.embedding = Embedding_Drop_Layer(len(dictionary), 300, 0.25)
# self.forward_encoder = EncoderRNN(args)
# if self.num_directions == 2:
# self.backward_encoder = EncoderRNN(args)
# self.dropout = nn.Dropout(args.dropout)
# self.relu = nn.ReLU()
# self.linear = nn.Linear(args.nhid * 2, 2)
# self.out = nn.Sequential(
# self.relu,
# self.dropout,
# self.relu,
# self.dropout,
# self.relu,
# self.dropout,
# self.linear)
# Initializing the weight parameters for the embedding layer and the encoder.
#### fix this
# self.embedding.init_embedding_weights(self.dictionary, self.embeddings_index, self.config.emsize)
self.embedding.init_embedding_weights(self.dictionary, self.embeddings_index, 300)
def forward(self, batch_sentence1):
""""Defines the forward computation of the question classifier."""
batch_variable = Variable(batch_sentence1)
#### fix this
#### make everything cuda
if (self.config.cuda == True): batch_variable = batch_variable.cuda()
return self.embedding(batch_variable)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
if not args.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
else:
torch.cuda.manual_seed(args.seed)
###############################################################################
# Load data
###############################################################################
#### fix this
# corpus = data.Corpus(args.data)
corpus = data.Corpus(args)
print('Train set size = ', len(corpus.train))
print('Development set size = ', len(corpus.dev))
# print('Test set size = ', len(corpus.test))
print('Vocabulary size = ', len(corpus.dictionary))
###############################################################################
# load_emb
###############################################################################
#### fix this
file_name = 'train_corpus_3' + 'embeddings_index.p'
embeddings_index = util.get_initial_embeddings(file_name, '/if1/kc2wc/data/glove/', 'glove.6B.300d_w_header.txt',corpus.dictionary)
print('Number of OOV words = ', len(corpus.dictionary) - len(embeddings_index))
###############################################################################
# batchify
###############################################################################
#### fix this
train_batches = util.batchify(corpus.train, args.batch_size)
# #### fix this
dev_batches = util.batchify(corpus.dev, args.batch_size)
# print (batchify([2,3,4,3,4,355,4,342,90], 2))
print('num_batches: ', len(train_batches))
print(len(train_batches[0]), train_batches[0][0].sentence1)
# ###############################################################################
# # Build the model
# ###############################################################################
model = LanguageModel(corpus.dictionary, embeddings_index, args)
model.cuda()
# print('==========================just after model initialization')
list = [[4,14], [14,4]]
l_t = torch.LongTensor(list)
# list_var = Variable(l_t)
# print('========================== before calling model forward', file = sys.stderr)
print(model(l_t)[0][1])
# model = LanguageModel(corpus.dictionary, embeddings_index, args)
# model.cuda()
# list = [[4,14], [14,4]]
# l_t = torch.LongTensor(list)
# # list_var = Variable(l_t)
# print(model(l_t)[0][1])