-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathbootstrap_cox.py
136 lines (107 loc) · 4.99 KB
/
bootstrap_cox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
@author: gbello
"""
import shutil
import pickle
from lifelines.utils import concordance_index
import numpy as np
from pathlib import Path
from argparse import ArgumentParser
from survival4D.cox_reg import train_cox_reg, hypersearch_cox
from survival4D.paths import DATA_DIR
from survival4D.config import CoxExperimentConfig, HypersearchConfig
DEFAULT_CONF_PATH = Path(__file__).parent.joinpath("default_cox.conf")
def parse_args():
parser = ArgumentParser()
parser.add_argument(
"-c", "--conf-path", dest="conf_path", type=str, default=None, help="Conf path."
)
return parser.parse_args()
def main():
args = parse_args()
if args.conf_path is None:
conf_path = DEFAULT_CONF_PATH
else:
conf_path = Path(args.conf_path)
exp_config = CoxExperimentConfig.from_conf(conf_path)
exp_config.output_dir.mkdir(parents=True, exist_ok=True)
hypersearch_config = HypersearchConfig.from_conf(conf_path)
shutil.copy(str(conf_path), str(exp_config.output_dir.joinpath("cox.conf")))
args = parse_args()
if args.data_dir is None:
data_dir = DATA_DIR
else:
data_dir = Path(args.data_dir)
# import input data: i_full=list of patient IDs, y_full=censoring status and survival times for patients,
# x_full=input data for patients (i.e. volumetric measures of RV function)
with open(str(data_dir.joinpath(args.file_name)), 'rb') as f:
c3 = pickle.load(f)
x_full = c3[0]
y_full = c3[1]
del c3
# Initialize lists to store predictions
preds_bootfull = []
inds_inbag = []
Cb_opts = []
# STEP 1
# (1a) find optimal hyperparameters
opars, osummary = hypersearch_cox(
x_data=x_full, y_data=y_full, method='particle swarm', nfolds=exp_config.n_folds, nevals=exp_config.n_evals,
penalty_range=hypersearch_config.penalty_exp
)
# (1b) using optimal hyperparameters, train a model on full sample
omod = train_cox_reg(xtr=x_full, ytr=y_full, penalty=10 ** opars['penalty'])
# (1c) Compute Harrell's Concordance index
predfull = omod.predict_partial_hazard(x_full)
C_app = concordance_index(y_full[:, 1], -predfull, y_full[:, 0])
print('\n\n==================================================')
print('Apparent concordance index = {0:.4f}'.format(C_app))
print('==================================================\n\n')
# BOOTSTRAP SAMPLING
# define useful variables
nsmp = len(x_full)
rowids = [_ for _ in range(nsmp)]
B = exp_config.n_bootstraps
for b in range(B):
print('\n-------------------------------------')
print('Current bootstrap sample:', b, 'of', B-1)
print('-------------------------------------')
# STEP 2: Generate a bootstrap sample by doing n random selections with replacement (where n is the sample size)
b_inds = np.random.choice(rowids, size=nsmp, replace=True)
xboot = x_full[b_inds]
yboot = y_full[b_inds]
# (2a) find optimal hyperparameters
bpars, bsummary = hypersearch_cox(
x_data=xboot, y_data=yboot, method='particle swarm', nfolds=exp_config.n_folds, nevals=exp_config.n_evals,
penalty_range=hypersearch_config.penalty_exp
)
# (2b) using optimal hyperparameters, train a model on bootstrap sample
bmod = train_cox_reg(xtr=xboot, ytr=yboot, penalty=10 ** bpars['penalty'])
# (2c[i]) Using bootstrap-trained model, compute predictions on bootstrap sample.
# Evaluate accuracy of predictions (Harrell's Concordance index)
predboot = bmod.predict_partial_hazard(xboot)
Cb_boot = concordance_index(yboot[:, 1], -predboot, yboot[:, 0])
# (2c[ii]) Using bootstrap-trained model, compute predictions on FULL sample.
# Evaluate accuracy of predictions (Harrell's Concordance index)
predbootfull = bmod.predict_partial_hazard(x_full)
Cb_full = concordance_index(y_full[:, 1], -predbootfull, y_full[:, 0])
# STEP 3: Compute optimism for bth bootstrap sample, as difference between results from 2c[i] and 2c[ii]
Cb_opt = Cb_boot - Cb_full
# store data on current bootstrap sample (predictions, C-indices)
preds_bootfull.append(predbootfull)
inds_inbag.append(b_inds)
Cb_opts.append(Cb_opt)
del bpars, bmod
# STEP 5
# Compute bootstrap-estimated optimism (mean of optimism estimates across the B bootstrap samples)
C_opt = np.mean(Cb_opts)
# Adjust apparent C using bootstrap-estimated optimism
C_adj = C_app - C_opt
# compute confidence intervals for optimism-adjusted C
C_opt_95confint = np.percentile([C_app - o for o in Cb_opts], q=[2.5, 97.5])
print('\n\n=========================SUMMARY=========================')
print('Apparent concordance index = {0:.4f}'.format(C_app))
print('Optimism bootstrap estimate = {0:.4f}'.format(C_opt))
print('Optimism-adjusted concordance index = {0:.4f}, and 95% CI = {1}'.format(C_adj, C_opt_95confint))
if __name__ == '__main__':
main()