-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfrom_model.py
72 lines (54 loc) · 2.66 KB
/
from_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import pandas as pd
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import OrdinalEncoder
from sklearn.multioutput import MultiOutputRegressor
import pickle as pcl
import numpy as np
def predict_logistic(model: LogisticRegression, encoder: OrdinalEncoder, df: pd.DataFrame):
raw = np.column_stack((encoder.transform(df[['state_name', 'gender']]), df['age'].values))
prob = model.predict_proba(raw)
answ = list(map(lambda e: "Y" if e == 1.0 else "N", model.predict(raw)))
res = np.column_stack((prob, answ))
return pd.DataFrame(columns=["N probability", "Y probability", "Y/N"],
data=res)
def predict_linear(model: LinearRegression, encoder: OrdinalEncoder, df: pd.DataFrame):
res = model.predict(encoder.transform(df[['state_name', 'gender', 'tobacco']]))
return pd.DataFrame(columns=["age"],
data=res.round())
def predict_multilinear(model: MultiOutputRegressor, encoder: OrdinalEncoder, df: pd.DataFrame):
res = model.predict(encoder.transform(df[['state_name', 'tobacco']])).round()
df = pd.DataFrame(columns=["age", "gender"], data=res)
df["gender"] = df["gender"].apply(lambda a: "Male" if a == 1. else "Female")
return df
def predict_tree(model: DecisionTreeClassifier, encoder, df: pd.DataFrame):
en_x, en_y = encoder
cat_col = ['state_name', 'gender']
X_data = np.column_stack((en_x.fit_transform(df[cat_col]), df['age'].values))
res = model.predict(X_data)
return pd.DataFrame({'tobacco': en_y.inverse_transform(res.round().reshape(-1, 1)).flatten()})
predictors = {
'Linear': predict_linear,
'MultiLinear': predict_multilinear,
'Logistic': predict_logistic,
'DecisionTree': predict_tree
}
inputs = {
'Linear': {'state_name': ['Georgia'], 'gender': ['Male'], 'tobacco': ['Smokeless Tobacco']},
'MultiLinear': {'state_name': ['Alaska'], 'tobacco': ['Pipe']},
'Logistic': {'state_name': ['Alabama', 'Georgia'], 'gender': ['Female', 'Male'], 'age': [46, 55]},
'DecisionTree': {'state_name': ['Georgia', 'Alabama'], 'gender': ['Female', 'Male'], 'age': [21, 55]}
}
def from_saved_model(path: str):
def wrap_pred(pred, model, fe):
return lambda df: pred(model, fe, df)
with open(path, 'rb') as file:
(model_name, model, feature_encoder) = pcl.load(file)
return wrap_pred(predictors[model_name], model, feature_encoder)
def test_saved_model():
file_name = "logistic_model"
path = f"models/{file_name}"
predict = from_saved_model(path)
res = predict(pd.DataFrame(inputs['Logistic']))
print(res)
test_saved_model()