-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpca~
21 lines (18 loc) · 865 Bytes
/
pca~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from numpy import *
#coding utf-8
def pca(dataMat, topNfeat=5):
meanVals = mean(dataMat, axis=0)
meanRemoved = dataMat - meanVals #减去均值
stded = meanRemoved / std(dataMat) #用标准差归一化
covMat = cov(stded, rowvar=0) #求协方差方阵
eigVals, eigVects = linalg.eig(mat(covMat)) #求特征值和特征向量
eigValInd = argsort(eigVals) #对特征值进行排序
eigValInd = eigValInd[:-(topNfeat + 1):-1]
redEigVects = eigVects[:, eigValInd] # 除去不需要的特征向量
lowDDataMat = stded * redEigVects #求新的数据矩阵
reconMat = (lowDDataMat * redEigVects.T) * std(dataMat) + meanVals
return lowDDataMat, reconMat
randArray = random.random(size=(10,8))
print randArray
a,b=pca(randArray)
print a,b