-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathnpPCA.py
37 lines (32 loc) · 1022 Bytes
/
npPCA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import numpy as np
import matplotlib.pyplot as plt
'''
Performs the Principal Coponent analysis of the Matrix X
Matrix must be n * m dimensions
where n is # features
m is # examples
'''
def PCA(X, varRetained = 0.95, show = False):
# Compute Covariance Matrix Sigma
(n, m) = X.shape
Sigma = 1.0 / m * X * np.transpose(X)
# Compute eigenvectors and eigenvalues of Sigma
U, s, V = np.linalg.svd(Sigma, full_matrices = True)
# compute the value k: number of minumum features that
# retains the given variance
sTot = np.sum(s)
var_i = np.array([np.sum(s[: i + 1]) / \
sTot * 100.0 for i in range(n)])
k = len(var_i[var_i < (varRetained * 100)])
print '%.2f %% variance retained in %d dimensions' \
% (var_i[k], k)
# plot the variance plot
if show:
plt.plot(var_i)
plt.xlabel('Number of Features')
plt.ylabel(' Percentage Variance retained')
plt.show()
# compute the reduced dimensional features by projction
U_reduced = U[:, : k]
Z = np.transpose(U_reduced) * X
return Z, U_reduced