-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpca.py
40 lines (30 loc) · 1.14 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import time
import numpy
import scipy, scipy.linalg
def pca(src, dim = 2, verbose = True, debug = False):
start = time.time()
if verbose:
print("PCA running on %d points in %d dimensions" % src.shape)
# Calculate the deviations from the mean
centered = src - src.mean(0)
# Find the covariance matrix
covm = numpy.dot(centered.T, centered) / centered.shape[0]
# Find the eigenvectors and eigenvalues of the covariance matrix
val, vec = scipy.linalg.eig(covm)
# Rearrange the eigenvectors and eigenvalues
index = numpy.real(val).argsort()
index = index[::-1]
# Convert the source data to z-scores
sd = numpy.sqrt(numpy.diag(covm))
z_scores = centered / sd
end = time.time()
if verbose:
print("Elapsed time... %f" % (end - start))
if debug:
debuginfo = {"covariance_matrix" : covm, "z_scores" : z_scores, \
"eigval" : val, "eigvec" : vec}
return (numpy.dot(z_scores, numpy.real(vec)[:,index[:dim]]), debuginfo)
else:
return numpy.dot(z_scores, numpy.real(vec)[:,index[:dim]])