-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlin_ucb.py
111 lines (86 loc) · 3.49 KB
/
lin_ucb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import linear_data_loader as ldl
import numpy as np
import util
from util import plot_error_rate
from util import plot_regret
from tqdm import tqdm
FEATURE_DIM = ldl.NUM_COLS
NUM_ACTIONS = 3
class Lin_UCB():
def __init__(self, alpha, K=NUM_ACTIONS, d=FEATURE_DIM):
self.alpha = alpha
self.K = K
self.d = d
self.A = [np.identity(self.d) for k in range(K)]
self.A_inv = [np.identity(self.d) for k in range(K)]
self.b = [np.zeros(self.d) for k in range(K)]
self.theta = None
self.regret = []
self.error_rate = []
self.cumu_regret = 0
self.sample_counter = 0
# evaluating A_inv and theta moved here for efficiency.
self.A_inv = [np.linalg.inv(a) for a in self.A]
self.theta = [a_inv.dot(b) for a_inv, b in zip(self.A_inv, self.b)]
def __str__(self):
return "LinUCB_dis"
def train(self, data, labels):
for i in tqdm(range(len(labels))):
self.update(data[i,:], labels[i])
def update(self, features, l):
self.sample_counter += 1
choose_action = self._evaluate_datum(features)
# observe reward r in {-1, 0}, turn it into {0, 1} for the algorithm
# update A
if l == choose_action:
#r = 0
r = 0
else:
#r = -1. * (abs(l - choose_action) ** 3)
r = -1
self.cumu_regret += (0 - (-1))
self.A[choose_action] += np.outer(features, features)
self.b[choose_action] += features * r
self.A_inv[choose_action] = np.linalg.inv(self.A[choose_action])
self.theta[choose_action] = self.A_inv[choose_action].dot(self.b[choose_action])
self.regret.append(self.cumu_regret)
self.error_rate.append(self.cumu_regret/self.sample_counter)
def get_regret(self):
return self.regret
def get_error_rate(self):
return self.error_rate
def evaluate(self, data):
"""
Given a data (NxM) input, return the corresponding dose
returns a list (Nx1) of labels
"""
self.A_inv = [np.linalg.inv(a) for a in self.A]
self.theta = [a_inv.dot(b) for a_inv, b in zip(self.A_inv, self.b)]
labels = np.zeros(len(data))
for i in range(len(data)):
labels[i] = self._evaluate_datum(data[i])
return labels
def _evaluate_datum(self, features):
p = np.zeros(self.K)
for i in range(len(p)):
tmp = features.T.dot(self.A_inv[i]).dot(features)
p[i] = self.theta[i].dot(features) + self.alpha * np.sqrt(tmp)
choose_action = np.argmax(p)
return choose_action
def test_lin_ucb_full(data, true_buckets, alpha=0.1):
lin_ucb = Lin_UCB(alpha = alpha)
lin_ucb.train(data, true_buckets)
pred_buckets = lin_ucb.evaluate(data)
acc, precision, recall = util.evaluate_performance(pred_buckets, true_buckets)
#print("accuracy on linear UCB: " + str(acc))
if __name__ == '__main__':
data, true_labels = ldl.get_data_linear()
true_buckets = [util.bucket(t) for t in true_labels]
ALPHA = 0.1
lin_ucb = Lin_UCB(alpha = ALPHA)
lin_ucb.train(data, true_buckets)
pred_buckets = lin_ucb.evaluate(data)
acc, precision, recall = util.evaluate_performance(pred_buckets, true_buckets)
#print("accuracy on linear UCB: " + str(acc))
plot_regret(lin_ucb.regret, ALPHA)
plot_error_rate(lin_ucb.error_rate, ALPHA)