-
Notifications
You must be signed in to change notification settings - Fork 0
/
Composition_of_algorithms.py
67 lines (47 loc) · 2.01 KB
/
Composition_of_algorithms.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''
Created on 18 июня 2016 г.
@author: miroslvgoncarenko
'''
import numpy as np
import pandas
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import log_loss
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
def convert_to_prob(X):
return np.power((np.exp(-X) + 1),-1)
data = pandas.read_csv('gbm-data.csv')
y = data.ix[:,0]
X = data.ix[:,1:].as_matrix()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=241)
n_estimators = 250
#for learning_rate in [1, 0.5, 0.3, 0.2, 0.1]:
# gbc = GradientBoostingClassifier(n_estimators=n_estimators, verbose=True, random_state=241, learning_rate=learning_rate)
# gbc.fit(X_train,y_train)
# score_train = np.zeros((X_train.shape[0],n_estimators,), dtype=np.float64)
# for i, el in enumerate(gbc.staged_decision_function(X_train)):
# score_train[:,i] = np.squeeze(el)
# y_train_prob = convert_to_prob(score_train)
# log_loss_train = np.zeros((n_estimators,), dtype=np.float64)
# for i in range(0, n_estimators):
# log_loss_train[i] = log_loss(y_train, y_train_prob[:,i])
#gbc.predict(X_test)
# score_test = np.zeros((X_test.shape[0],n_estimators,), dtype=np.float64)
# for i, el in enumerate(gbc.staged_decision_function(X_test)):
# score_test[:,i] = np.squeeze(el)
# y_test_prob = convert_to_prob(score_test)
# log_loss_test = np.zeros((n_estimators,), dtype=np.float64)
# for i in range(0, n_estimators):
# log_loss_test[i] = log_loss(y_test, y_test_prob[:,i])
#plt.figure()
#plt.plot(log_loss_test, 'r', linewidth=2)
#plt.plot(log_loss_train, 'g', linewidth=2)
#plt.legend(['test', 'train'])
#plt.show()
# shfl = True
rfc = RandomForestClassifier(n_estimators=37, random_state=241)
rfc.fit(X_train, y_train)
y_forest_prob = rfc.predict_proba(X_test)
log_loss_forest = log_loss(y_test, y_forest_prob)
Shuffle = True