-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsvm.py
164 lines (149 loc) · 5.21 KB
/
svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/usr/bin/python
## Logistic regression for 6.867 hw2
from math import log, exp
import numpy
import scipy.optimize
import cvxopt, cvxopt.solvers
from plotBoundary import *
from utils import *
numpy.set_printoptions(threshold=numpy.nan)
## primal SVM
def primal(phi,y,C, debug=False):
## primal quad prog.
## let n be the number of data points, and let m be the number of features
n,m = phi.shape
Q = numpy.zeros((m+n+1,m+n+1))
for i in range(m):
Q[i,i] = 1
c = numpy.vstack([numpy.zeros((m+1,1)), C*numpy.ones((n,1))])
A = numpy.zeros((2*n, m+1+n))
A[:n,0:m] = y*phi
A[:n,m] = y.T
A[:n,m+1:] = numpy.eye(n)
A[n:,m+1:] = numpy.eye(n)
A = -A
g = numpy.zeros((2*n,1))
g[:n] = -1
## E and d are not used in the primal form
## convert to array
## have to convert everything to cxvopt matrices
Q = cvxopt.matrix(Q,Q.shape,'d')
c = cvxopt.matrix(c,c.shape,'d')
A = cvxopt.matrix(A,A.shape,'d')
g = cvxopt.matrix(g,g.shape,'d')
## set up cvxopt
## z (the vector being minimized for) in this case is [w, b, eps].T
sol = cvxopt.solvers.qp(Q, c, A, g)
return sol
## the dual takes a kernel function as well
def dual(phi,y,C,K, debug=False):
## primal quad prog.
## let n be the number of data points, and let m be the number of features
n,m = phi.shape
## the kernel function takes two 1xm x-vectors and returns a scalar
## first, compute the nxn matrix K(x_i, x_j)
KM = numpy.zeros((n,n))
for i in range(n):
for j in range(n):
KM[i,j] = float(K(phi[i,:], phi[j,:]))
## multiply in y_i*y_j
yy = y.dot(y.T)
assert yy.shape == (n,n)
Q = y.dot(y.T) * KM * .5
c = -numpy.ones((n,1))
A = numpy.zeros((2*n, n))
A[:n,:] = numpy.eye(n)
A[n:,:] = -numpy.eye(n)
g = numpy.zeros((2*n,1))
g[:n] = C
E = y.T
d = numpy.array([[0]])
## convert to array
## have to convert everything to cxvopt matrices
Q = cvxopt.matrix(Q,Q.shape,'d')
c = cvxopt.matrix(c,c.shape,'d')
A = cvxopt.matrix(A,A.shape,'d')
g = cvxopt.matrix(g,g.shape,'d')
E = cvxopt.matrix(E,E.shape,'d')
d = cvxopt.matrix(d,d.shape,'d')
## set up cvxopt
## z (the vector being minimized for) in this case is [w, b, eps].T
sol = cvxopt.solvers.qp(Q, c, A, g, E, d)
return sol
## given the alpha Lagrange coefficients from dual, calculate the weights and offset
def dualWeights(x, y, K, alpha, C):
assert len(x.shape) > 1
n,m = x.shape
assert y.shape == (n,1)
assert alpha.shape == (n,1)
## calculate weights
w = numpy.sum([alpha[i] * y[i] * x[i,:] for i in range(n)], axis=0)
## calculate offset b
## get the indices of the support vectors
sIndices = [i for i in range(n) if alpha[i] > 0]
S = len(sIndices)
## first count the number of data points for which 0 < alpha < C
mIndices = [i for i in range(n) if 0 < alpha[i] < C]
M = len(mIndices)
b = sum([y[j] - sum([alpha[i] * y[i] * K(x[i,:],x[j,:]) for i in sIndices]) for j in mIndices])/float(M)
return w, b, S, M
## given an array with shape (r) or (r,1), will return one with shape (r,1)
def fixSingleton(x):
return x.reshape((x.shape[0],1)) if len(x.shape) == 1 else x
def fixSingletons(*args):
return [fixSingleton(z) for z in args]
## nifty kallable Kernel wrapper klass
class Kernel:
def __init__(self, K):
self.K = K
def __call__(self, a, b):
## make sure they're 2d
a,b = fixSingletons(a,b)
for x,name in [(a,"a"),(b,"b")]:
assert x.shape[1] == 1, "Vector %s has shape %s" %(name, str(x.shape))
return self.K(a,b)
## define a linear kernel function, where inputs a and b are each 1xm input vectors
lk = lambda a,b: sum(a.T.dot(b))
linearKernel = Kernel(lk)
squaredKernel = Kernel(lambda a,b: (1 + a.T.dot(b))**2)
#squaredKernel = Kernel(lambda a,b: (1 + (a.T.dot(b))**2))
beta = 0.5 ## variance of 1
beta = 10
print "beta = " + str(beta)
#gaussianKernel = Kernel(lambda a,b: exp(-beta*((a-b).T.dot(a-b))))
def makeGaussian(beta):
def g(a,b):
#assert a.shape == b.shape, "a.shape: " + str(a.shape) + ", b.shape: " + str(b.shape)
#if a.shape[0] == 1:
# a = a.reshape((a.shape[1],1))
# b = b.reshape((b.shape[1],1))
return exp(-beta*((a-b).T.dot(a-b)))
return g
gaussianKernel = Kernel(makeGaussian(beta))
if __name__=='__main__':
## a very simple training test
numpy.set_printoptions(threshold=numpy.nan)
M = 2
xDummy = numpy.array([[1,0],[-1,0]])
assert xDummy.shape == (2,2)
phiDummy = makePhi(xDummy,M)
assert phiDummy.shape == (2,6)
print phiDummy
n,m = phiDummy.shape
#n,m = xDummy.shape
yDummy = numpy.array([[1], [-1]])
assert yDummy.shape == (2,1)
## Carry out training, primal and/or dual
C = 0.25
## primal
p = primal(phiDummy,yDummy,C, debug=True)
wP = numpy.array(p['x'][:m])
bP = p['x'][m]
xValidate = numpy.array([[100,0],[-100,231232],[0,123123123]])
yP = makePhi(xValidate,M).dot(wP) + bP
print yP>0
## dual
d = dual(phiDummy,yDummy,C, linearKernel, debug=True)
alphaD = numpy.array(d['x'])
print alphaD
w,b,S,M = dualWeights(phiDummy, yDummy, linearKernel, alphaD, C)