-
Notifications
You must be signed in to change notification settings - Fork 0
/
xgboost_optuna_optimisation.py
88 lines (72 loc) · 3.38 KB
/
xgboost_optuna_optimisation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Optuna example that optimizes a classifier configuration for cancer dataset
using XGBoost.
In this example, we optimize the validation accuracy of cancer detection
using XGBoost. We optimize both the choice of booster model and its
hyperparameters.
"""
import numpy as np
import optuna
import pandas as pd
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb
def objective(trial):
# Load Data
filepath = 'E:\GranuBeaker\savedata\gb_particledata'
# Load data
df = pd.read_feather(filepath, columns=None, use_threads=True)
df = df.dropna(axis=0)
# headers = df.head()
# print(headers)
# Extract feature and target arrays
X, y = df.drop(['no_particles', 'packing_fraction'], axis=1), df[['packing_fraction']]
train_x, valid_x, train_y, valid_y = train_test_split(np.array(X), np.array(y), test_size=0.25)
dtrain = xgb.DMatrix(train_x, label=train_y)
dvalid = xgb.DMatrix(valid_x, label=valid_y)
param = {
"verbosity": 0,
"objective": "binary:logistic",
# use exact for small dataset.
"tree_method": "exact",
# defines booster, gblinear for linear functions.
"booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
# L2 regularization weight.
"lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
# L1 regularization weight.
"alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
# sampling ratio for training data.
"subsample": trial.suggest_float("subsample", 0.2, 1.0),
# sampling according to each tree.
"colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
}
if param["booster"] in ["gbtree", "dart"]:
# maximum depth of the tree, signifies complexity of the tree.
param["max_depth"] = trial.suggest_int("max_depth", 3, 9, step=2)
# minimum child weight, larger the term more conservative the tree.
param["min_child_weight"] = trial.suggest_int("min_child_weight", 2, 10)
param["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
# defines how selective algorithm is.
param["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
param["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])
if param["booster"] == "dart":
param["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
param["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
param["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
param["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)
bst = xgb.train(param, dtrain)
preds = bst.predict(dvalid)
# pred_labels = np.rint(preds)
accuracy = sklearn.metrics.r2_score(valid_y, preds)
return accuracy
if __name__ == "__main__":
study = optuna.create_study(direction="maximize", storage="sqlite:///db.sqlite3", study_name="xgboost_study")
study.optimize(objective, n_trials=500, timeout=600)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))