forked from Leoxu123/DLNLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
sgd.py
99 lines (83 loc) · 3.49 KB
/
sgd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
SAVE_PARAMS_EVERY = 1000
import glob
import os.path as op
import cPickle as pickle
import random
import numpy as np
# Now, implement SGD
# Save parameters every a few SGD iterations as fail-safe
SAVE_PARAMS_EVERY = 1000
import glob
import os.path as op
import cPickle as pickle
def load_saved_params():
""" A helper function that loads previously saved parameters and resets iteration start """
st = 0
for f in glob.glob("params/saved_params_*.npy"):
iter = int(op.splitext(op.basename(f))[0].split("_")[2])
if (iter > st):
st = iter
if st > 0:
with open("params/saved_params_%d.npy" % st, "r") as f:
params = pickle.load(f)
state = pickle.load(f)
return st, params, state
else:
return st, None, None
def save_params(iter, params):
with open("params/saved_params_%d.npy" % iter, "w") as f:
pickle.dump(params, f)
pickle.dump(random.getstate(), f)
def sgd(f, x0, step, iterations, postprocessing = None, useSaved = False, PRINT_EVERY=10):
""" Stochastic Gradient Descent """
###################################################################
# Implement the stochastic gradient descent method in this #
# function. #
# Inputs: #
# - f: the function to optimize, it should take a single #
# argument and yield two outputs, a cost and the gradient #
# with respect to the arguments #
# - x0: the initial point to start SGD from #
# - step: the step size for SGD #
# - iterations: total iterations to run SGD for #
# - postprocessing: postprocessing function for the parameters #
# if necessary. In the case of word2vec we will need to #
# normalize the word vectors to have unit length. #
# - PRINT_EVERY: specifies every how many iterations to output #
# Output: #
# - x: the parameter value after SGD finishes #
###################################################################
# Anneal learning rate every several iterations
ANNEAL_EVERY = 20000
if useSaved:
start_iter, oldx, state = load_saved_params()
if start_iter > 0:
x0 = oldx;
step *= 0.5 ** (start_iter / ANNEAL_EVERY)
if state:
random.setstate(state)
else:
start_iter = 0
x = x0
if not postprocessing:
postprocessing = lambda x: x
expcost = None
costs = np.zeros((iterations/PRINT_EVERY,1))
for iter in xrange(start_iter + 1, iterations + 1):
### YOUR CODE HERE
### Don't forget to apply the postprocessing after every iteration!
### You might want to print the progress every few iterations.
cost ,grad = f(x)
x = x-step*grad
x = postprocessing(x)
if iter % PRINT_EVERY ==0:
print iter,'cost = %f' %cost
#print grad[0,:]
costs[iter/PRINT_EVERY-1]=cost
### END YOUR CODE
if iter % SAVE_PARAMS_EVERY == 0 and useSaved:
save_params(iter, x)
if iter % ANNEAL_EVERY == 0:
step *= 0.5
np.savetxt('costs.txt', costs, fmt='%f')
return x