-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_utils.py
54 lines (48 loc) · 2.43 KB
/
test_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from sklearn.utils import shuffle
from tpot.tpot import TPOTClassifier, TPOTRegressor
from os import makedirs
def create_dirs(dir_name):
try:
makedirs(dir_name)
except:
pass
def extract_labels(df, labelname):
y = df[labelname].copy(deep=True)
x = df.drop(labelname, axis=1)
x, y = shuffle(x, y)
x = x.to_numpy()
y = y.to_numpy()
return x, y
def get_optimizer(classification,
gens=2,
pop_size=10,
offspr_size=10,
mr=0.9,
cr=0.1,
scoring=None,
cv=5,
n_jobs=1,
maxtmins=10,
verbosity=2,
track_fitnesses=False,
track_generations=False,
resource_logging=False,
test_x=None,
test_y=None):
# hp_opt_iterations and hp_opt_mutate_prob are shared among hp tuning each iteration and hp tuning final population
# hp_improvs_tracker tracks both the hp tuning during each generation and for the final population
if classification:
scoring = 'accuracy' if scoring is None else scoring
pipeline_optimizer = TPOTClassifier(generations=gens, population_size=pop_size, offspring_size=offspr_size,
mutation_rate=mr, crossover_rate=cr, scoring=scoring,
cv=cv, n_jobs=n_jobs, max_eval_time_mins=maxtmins, verbosity=verbosity,
track_fitnesses=track_fitnesses, track_generations=track_generations,
resource_logging=resource_logging, test_x = test_x, test_y = test_y)
else:
scoring = 'neg_mean_squared_error' if scoring is None else scoring
pipeline_optimizer = TPOTRegressor(generations=gens, population_size=pop_size, offspring_size=offspr_size,
mutation_rate=mr, crossover_rate=cr, scoring=scoring,
cv=cv, n_jobs=n_jobs, max_eval_time_mins=maxtmins, verbosity=verbosity,
track_fitnesses=track_fitnesses, track_generations=track_generations,
resource_logging=resource_logging, test_x = test_x, test_y = test_y)
return pipeline_optimizer