-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathmain.lua
188 lines (156 loc) · 6.04 KB
/
main.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
require 'paths'
local model = require 'gated_cnn'
local dl = require 'dataload'
require 'nn'
local optim = require 'optim'
cmd = torch.CmdLine()
cmd:text()
cmd:text('Options:')
-- training
cmd:option('--startlr', 0.001, 'learning rate at t=0')
cmd:option('--weightDecay', 0.00001, 'weight decay')
cmd:option('--minlr', 0.00001, 'minimum learning rate')
cmd:option('--saturate', 50, 'epoch at which linear decayed LR will reach minlr')
cmd:option('--momentum', 0.9, 'momentum')
cmd:option('--maxnormout', -1, 'max l2-norm of each layer\'s output neuron weights')
cmd:option('--cutoff', -1, 'max l2-norm of concatenation of all gradParam tensors')
cmd:option('--device', 1, 'sets the device (GPU) to use')
cmd:option('--maxepoch', 1000, 'maximum number of epochs to run')
cmd:option('--show', 1000, 'show')
-- gated cnn layer
cmd:option('--inputsize', 256, 'gated cnn size')
cmd:option('--kW', 3, 'convolution kernal size')
cmd:option('--blockNum', 4, 'block number')
cmd:option('--seqlen', 5, 'sequence length : back-propagate through time (BPTT) for this many time-steps')
cmd:option('--dropout', 0.2, 'dropout rate in model')
-- data
cmd:option('--batchsize', 32, 'number of examples per batch')
cmd:option('--trainsize', -1, 'number of train examples seen between each epoch')
cmd:option('--validsize', -1, 'number of valid examples used for early stopping and cross-validation')
cmd:option('--savepath', paths.concat(dl.SAVE_PATH, 'gated_cnn'), 'path to directory where experiment log (includes model) will be saved')
cmd:option('--id', '', 'id string of this experiment (used to name output file) (defaults to a unique id)')
cmd:text()
local opt = cmd:parse(arg or {})
require 'cunn'
cutorch.setDevice(opt.device)
local trainset, validset, testset = dl.loadPTB({opt.batchsize, 1, 1})
opt.ivocabSize = #trainset.ivocab
opt.id = opt.id == '' and ('ptb' .. ':' .. dl.uniqueid()) or opt.id
local lm = model(opt)
local crit = nn.ClassNLLCriterion()
local preprocess = nn.Transpose({1, 2})
print(lm)
lm:cuda()
crit:cuda()
preprocess:cuda()
local xplog = {}
xplog.opt = opt -- save all hyper-parameters and such
xplog.dataset = 'PennTreeBank'
xplog.vocab = trainset.vocab
-- will only serialize params
xplog.model = lm
xplog.criterion = crit
xplog.preprocess = preprocess
-- keep a log of NLL for each epoch
xplog.trainppl = {}
xplog.valppl = {}
-- will be used for early-stopping
xplog.minvalppl = 99999999
xplog.epoch = 0
local ntrial = 0
paths.mkdir(opt.savepath)
local epoch = 1
opt.lr = opt.startlr
opt.trainsize = opt.trainsize == -1 and trainset:size() or opt.trainsize
opt.validsize = opt.validsize == -1 and validset:size() or opt.validsize
local params, grad_params = lm:getParameters()
local function feval()
return crit.output, grad_params
end
local optimState = {
learningRate = opt.lr,
weightDecay = opt.weightDecay
-- learningRateDecay = 0.0,
-- momentum = opt.momentum,
-- nesterov = true,
-- dampening = 0.0
}
while opt.maxepoch <= 0 or epoch <= opt.maxepoch do
print("")
print("Epoch #"..epoch.." :")
-- 1. training
local a = torch.Timer()
lm:training()
local h = 0
local h1 = 0
local showErr = 0
local sumErr = 0
for i, inputs, targets in trainset:subiter(opt.seqlen, opt.trainsize) do
targets = preprocess:forward(targets:float():cuda()):clone()
inputs = preprocess:forward(inputs:float():cuda()):clone()
targets = targets:view(-1)
-- forward
local outputs = lm:forward(inputs)
local err = crit:forward(outputs, targets)
sumErr = sumErr + err
showErr = showErr + err
h1 = h1 + 1
h = h + 1
if i % opt.show == 0 then
showErr = showErr / h1
print(' ' .. epoch .. '-' .. i .. ' err: ' .. string.format("%.2f", showErr) .. ' ppl: ' .. string.format("%.2f", torch.exp(showErr)) .. ' avgppl: ' .. string.format("%.2f", torch.exp(sumErr / h)))
showErr = 0
h1 = 0
end
-- backward
local gradOutputs = crit:backward(outputs, targets)
lm:zeroGradParameters()
lm:backward(inputs, gradOutputs)
optim.adam(feval, params, optimState)
if i % 1000 == 0 then
collectgarbage()
end
end
-- learning rate decay
-- optimState.learningRate = optimState.learningRate + (opt.minlr - opt.startlr) / opt.saturate
-- optimState.learningRate = math.max(opt.minlr, optimState.learningRate)
print("learning rate", optimState.learningRate)
-- if opt.meanNorm then
-- print("mean gradParam norm", opt.meanNorm)
-- end
if cutorch then cutorch.synchronize() end
local speed = a:time().real/opt.trainsize
print(string.format("Speed : %f sec/batch ", speed))
local ppl = torch.exp(sumErr / h)
print("Training PPL : "..ppl)
xplog.trainppl[epoch] = ppl
-- 2. cross-validation
lm:evaluate()
local sumErr = 0
local h = 0
for i, inputs, targets in validset:subiter(opt.seqlen, opt.validsize) do
targets = preprocess:forward(targets:float():cuda()):clone()
inputs = preprocess:forward(inputs:float():cuda()):clone()
targets = targets:view(-1)
-- forward
local outputs = lm:forward(inputs)
local err = crit:forward(outputs, targets)
sumErr = sumErr + err
h = h + 1
end
local ppl = torch.exp(sumErr/h)
-- Perplexity = exp( sum ( NLL ) / #w)
print("Validation PPL : "..ppl)
xplog.valppl[epoch] = ppl
-- early-stopping
if ppl < xplog.minvalppl then
-- save best version of model
xplog.minvalppl = ppl
xplog.epoch = epoch
local filename = paths.concat(opt.savepath, opt.id..'.t7')
print("Found new minima. Saving to "..filename)
torch.save(filename, xplog)
end
collectgarbage()
epoch = epoch + 1
end