-
Notifications
You must be signed in to change notification settings - Fork 11
/
baselines.py
62 lines (46 loc) · 1.49 KB
/
baselines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import argparse
import os
import pickle
import numpy as np
from sklearn.linear_model import SGDClassifier
from utils import pickle_it
"""Arguments"""
parser = argparse.ArgumentParser()
parser.add_argument('data', type=str)
parser.add_argument('output', type=str)
EPSILON_NAME = "epsilon.pickle"
RCV1_NAME = "rcv1.pickle"
args = parser.parse_args()
if not os.path.exists(args.output):
print('create {}'.format(args.output))
os.makedirs(args.output)
baselines = {}
def loss(clf, X, y, reg):
baseline_loss = np.sum(np.log(1 + np.exp(-y * (X @ clf.coef_.transpose()).squeeze()))) / X.shape[0]
baseline_loss += reg / 2 * np.sum(np.square(clf.coef_))
return baseline_loss
""" RCV1 test"""
print('RCV1-test')
with open(os.path.join(args.data, RCV1_NAME), 'rb') as f:
X, y = pickle.load(f)
reg = 1 / X.shape[0]
clf = SGDClassifier(tol=1e-4, loss='log', penalty='l2', alpha=reg, fit_intercept=False)
clf.fit(X, y)
l = loss(clf, X, y, reg)
print("loss: {}".format(l))
print("train accuracy: {}".format(clf.score(X, y)))
baselines['RCV1-test'] = l
""" EPSILON """
print('epsilon')
with open(os.path.join(args.data, EPSILON_NAME), 'rb') as f:
X, y = pickle.load(f)
reg = 1 / X.shape[0]
clf = SGDClassifier(tol=1e-4, loss='log', penalty='l2', alpha=reg)
clf.fit(X, y)
l = loss(clf, X, y, reg)
print("loss: {}".format(l))
print("train accuracy: {}".format(clf.score(X, y)))
baselines['epsilon'] = l
""" Pickle """
print('baselines', baselines)
pickle_it(baselines, 'baselines', args.output)