-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01.3.feature_selection_C.py
72 lines (63 loc) · 2.07 KB
/
01.3.feature_selection_C.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from GlobalUtils import *
import ref_4_mifs as mifs
import ref_4_mi as mi
import sendemail as EMAIL
from sklearn.externals import joblib
import sklearn
import numpy
from MachineSpecificSettings import Settings
from DataSetLoaderLib import DataSetLoader
import time
@timing
def SelectSubSetmRMR(vectors, classes,useMethod,features):
X = vectors
y = classes
# define MI_FS feature selection method
feat_selector = mifs.MutualInformationFeatureSelector(method=useMethod,n_features=features)
# find all relevant features
feat_selector.fit(X, y)
# check selected features
print (feat_selector.support_)
# check ranking of features
print (feat_selector.ranking_)
print (len(feat_selector.ranking_))
selected_indices=feat_selector.ranking_
# call transform() on X to filter it down to selected features
X_filtered = feat_selector.transform(X)
return [X_filtered,selected_indices]
@timing
def loadDataset(identifier):
d = DataSetLoader()
x = d.LoadDataSet(identifier)
print 'X', x.shape
y= d.LoadDataSetClasses(identifier)
print 'Y', y.shape
#y=numpy.transpose(y.astype(numpy.int64))
y = sklearn.utils.validation.column_or_1d(y, warn=True)
print 'Y', y.shape
target=[]
y=list(y)
print "y before manual transform =" , y
for i in y:
target.append(int(i))
print len(y)
print y
return x, y
@timing
def mainloop():
datasets=['C_train']
sizes=['10','50','100','150','200','250']
methods=['MRMR','JMI','JMIM']
for dataset in datasets:
x, y = loadDataset(dataset)
for method in methods:
for size in sizes:
print size
print method
selected_indices=[]
#return
[subset,selected_indices] = SelectSubSetmRMR(x,y,method,int(size))
joblib.dump(selected_indices,'selected_features/dataset' + str(dataset) + str(size) + '-' + method + '.joblib.pkl', compress=9)
print "Saved new selected indices"
EMAIL.SendEmail(' DONE',str(dataset) + str(size) + '-' + method + '.joblib.pkl' )
mainloop()