-
Notifications
You must be signed in to change notification settings - Fork 0
/
tune_pr.py
executable file
·83 lines (65 loc) · 2.6 KB
/
tune_pr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import sys
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from cts.models._penalised_regression import (lasso_regression,
ridge_regression,
enet_regression)
from cts.utils import ROOT, RAW_DATA, TRAIN_TEST_PARAMS, CV_FOLDS
from cts.utils import (load_dataframe,
process_data,
create_directory,
save_models)
# Create directory for figures
path = os.path.join(ROOT, "figures")
create_directory(path)
# Load data and split data into training and testing sets
df = load_dataframe(RAW_DATA)
data = process_data(df)
X = data.drop(['p_value'], axis=1)
y = -np.log10(data["p_value"])
X_train, X_test, y_train, y_test = train_test_split(X, y, **TRAIN_TEST_PARAMS)
# =============================================================================
# Train models
# =============================================================================
# Model tuning
# ------------
# Set seed and n_jobs. Print fit times
seed = 1010
show_time = True
n_jobs = int(sys.argv[1])
# Penalised Regression (LASSO, Ridge, Elastic-Net)
# ------------------------------------------------
en_params = dict(alpha=np.logspace(-8, 8, 17),
l1_ratio=np.linspace(0, 1, 41))
pr_params = {k:v for k, v in en_params.items() if k == "alpha"}
lasso_cv = lasso_regression(X_train, y_train, param_grid=pr_params,
folds=CV_FOLDS, n_jobs=n_jobs, random_state=seed,
return_fit_time=show_time)
print(12*"-", "\n")
lasso = lasso_cv.best_estimator_
print(lasso)
print(12*"-", "\n")
print("Lasso test score (R2) :", r2_score(y_test, lasso.predict(X_test)))
print(36*"=", "\n")
ridge_cv = ridge_regression(X_train, y_train, param_grid=pr_params,
folds=CV_FOLDS, n_jobs=n_jobs, random_state=seed,
return_fit_time=show_time)
print(12*"-", "\n")
ridge = ridge_cv.best_estimator_
print(ridge)
print(12*"-", "\n")
print("Ridge test score (R2) :", r2_score(y_test, ridge.predict(X_test)))
print(36*"=", "\n")
enet_cv = enet_regression(X_train, y_train, param_grid=en_params,
folds=CV_FOLDS, n_jobs=n_jobs, random_state=seed,
return_fit_time=show_time)
print(12*"-", "\n")
enet = enet_cv.best_estimator_
print(enet)
print(12*"-", "\n")
print("ElasticNet test score (R2) :", r2_score(y_test, enet.predict(X_test)))
print(36*"=", "\n")
# Save model(s)
save_models([lasso, ridge, enet])