-
Notifications
You must be signed in to change notification settings - Fork 1
/
gradient_boosting_model.py
35 lines (28 loc) · 1.29 KB
/
gradient_boosting_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import cross_val_score
def train_model(cc_features, pt_features, labels, verbose=1):
X = np.hstack((cc_features, pt_features)).astype(np.float32) # concatenate along axis 1
y = labels.reshape(-1) # reshape column to row vector
# using random forest, so normalisation not strictly necessary
scaler = StandardScaler()
X = scaler.fit_transform(X)
model = GradientBoostingClassifier(subsample=0.5,
max_features=0.5,
max_depth=5,
)
if verbose >= 3:
print(f"Gradboost train 4-cv score: {cross_val_score(model, X, y, cv=4)}")
model.fit(X, y)
y_pred = model.predict(X)
if verbose >= 3:
print(f"Train accuracy score: {accuracy_score(labels,y_pred):.4f}")
return scaler, model
def run_model(scaler, model, cc_features, pt_features):
X = np.hstack((cc_features, pt_features))
X = scaler.transform(X)
y_pred = model.predict(X)
probs = model.predict_proba(X)
return y_pred, probs