-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimizers.py
121 lines (86 loc) · 3.58 KB
/
optimizers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
import pickle
import skml_config
class SGD:
def __init__(self, lr=0.01):
self.lr = lr
def update(self, params, grads):
for i in range(len(params)):
params[i] -= self.lr * grads[i]
class Momentum:
def __init__(self, lr=0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = np.empty_like(params)
for i, param in enumerate(params):
self.v[i] = np.zeros_like(param, skml_config.config.f_type)
for i in range(len(params)):
self.v[i] = self.momentum * self.v[i] - self.lr * grads[i]
params[i] += self.v[i]
class Nesterov:
def __init__(self, lr=0.01, momentum=0.9):
self.lr = lr
self.momentum = momentum
self.v = None
def update(self, params, grads):
if self.v is None:
self.v = np.empty_like(params, skml_config.config.f_type)
for i, param in enumerate(params):
self.v[i] = np.zeros_like(param, skml_config.config.f_type)
for i in range(len(params)):
self.v[i] *= self.momentum
self.v[i] -= self.lr * grads[i]
params[i] += self.momentum * self.momentum * self.v[i]
params[i] -= (1 + self.momentum) * self.lr * grads[i]
class AdaGrad:
def __init__(self, lr=0.01):
self.lr = lr
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = np.empty_like(params, skml_config.config.f_type)
for i, param in enumerate(params):
self.h[i] = np.zeros_like(param, skml_config.config.f_type)
for i in range(len(params)):
self.h[i] += grads[i] * grads[i]
params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7)
class RMSprop:
def __init__(self, lr=0.01, decay_rate = 0.99):
self.lr = lr
self.decay_rate = decay_rate
self.h = None
def update(self, params, grads):
if self.h is None:
self.h = np.empty_like(params, skml_config.config.f_type)
for i, param in enumerate(params):
self.h[i] = np.zeros_like(param, skml_config.config.f_type)
for i in range(len(params)):
self.h[i] *= self.decay_rate
self.h[i] += (1 - self.decay_rate) * grads[i] * grads[i]
params[i] -= self.lr * grads[i] / (np.sqrt(self.h[i]) + 1e-7)
class Adam:
def __init__(self, lr=0.001, beta1=0.9, beta2=0.999):
self.lr = lr
self.beta1 = beta1
self.beta2 = beta2
self.iter = 0
self.m = None
self.v = None
def update(self, params, grads):
if self.m is None:
self.m, self.v = [], []
for param in params:
self.m.append(np.zeros_like(param, skml_config.config.f_type))
self.v.append(np.zeros_like(param, skml_config.config.f_type))
self.iter += 1
lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)
for i in range(len(params)):
self.m[i] += (1 - self.beta1) * (grads[i] - self.m[i])
self.v[i] += (1 - self.beta2) * (grads[i]**2 - self.v[i])
params[i] -= lr_t * self.m[i] / (np.sqrt(self.v[i]) + 1e-7)
def save(self, path):
with open(path, "wb") as f:
pickle.dump(self, f)