-
Notifications
You must be signed in to change notification settings - Fork 0
/
lr.py
executable file
·59 lines (47 loc) · 1.61 KB
/
lr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/bin/python
# Train an L2-regularized Logistic Regression classifier
# (C) John T. Halloran, 2017
from __future__ import division
import sys
sys.path.append('/home/jthalloran/soft/liblinear-2.11/python')
from liblinearutil import svm_read_problem
from cvxpy import *
import numpy as np
import timeit
largeTestSet = True
dset = '/home/jthalloran/classificationData/ijcnn1.tr'
# dset = '/home/jthalloran/classificationData/rcv1_train.binary'
# dset = '/home/jthalloran/classificationData/news20.binary'
start_time = timeit.default_timer()
Y, X0 = svm_read_problem(dset)
n = len(X0)
d = max([max(x) for x in X0 if x])
m = n
print "%d instances, %d features" % (n, d)
X = np.zeros(shape=(n,d))
for i,x in enumerate(X0):
for j in x:
X[i][j-1] = x[j]
del X0[:]
elapsed = timeit.default_timer() - start_time
print "%f seconds to load data" % (elapsed)
start_time = timeit.default_timer()
# Form L2-regularized logistic regression
w = Variable(d)
L = [log_sum_exp(vstack(0,-Y[i] * X[i,:] * w)) for i in range(m)]
loss = sum(L)
reg = 0.5 * sum_squares(w)
C = 4.0
prob = Problem(Minimize(C * loss + reg))
elapsed = timeit.default_timer() - start_time
print "%f seconds spent formulating problem" % (elapsed)
# Solve problem and time it
start_time = timeit.default_timer()
# prob.solve()
prob.solve(solver=SCS, verbose=False, eps = 1e-2)
elapsed = timeit.default_timer() - start_time
print "%f seconds to train Logistic Regression with L2 regularization" % (elapsed)
# training error
h = np.asarray(np.sign(X.dot(w.value))).reshape(-1)
train_error = float(sum(h != np.sign(Y))) / float(n)
print "%f train accuracy" % (1.0 - train_error)