forked from Marmingen/SML-lead-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
85 lines (57 loc) · 2.18 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
##########################################################
## IMPORTS
import numpy as np
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as Q
from sklearn.model_selection import KFold
from sklearn import preprocessing
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV
import sys
import os
##########################################################
## FIXING PATH
sys.path.append(str(sys.path[0][:-14]))
dirname = os.getcwd()
sys.path.insert(1, os.path.join(dirname, "general_classes"))
##########################################################
## LOCAL PACKAGES
from general_classes import DataPreparation
##########################################################
## GLOBALS
bar = "************************************************************"
############################################################
## FUNCTIONS
def training():
# setting up dataprep instance
dp = DataPreparation("./data/train.csv", clean=True)
# combines the data due to the training
X_train, X_test, Y_train, Y_test = dp.get_sets()
X_train = np.concatenate((X_train, X_test))
Y_train = np.concatenate((Y_train, Y_test))
# Use data augmentation
sm = SMOTE(k_neighbors = 5)
X_res_a, Y_res_a = sm.fit_resample(X_train, Y_train)
X_train = np.concatenate((X_train, X_res_a))
Y_train = np.concatenate((Y_train, Y_res_a))
# Normalize the data
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
# Train the model and make predictions
qda = Q(reg_param = 0.0)
model = qda.fit(X_train, Y_train)
return model
def predicting():
dp = DataPreparation("./data/test.csv", test=True)
scaler = preprocessing.StandardScaler().fit(dp.X_true)
dp.X_true = scaler.transform(dp.X_true)
model = training()
preds = model.predict(dp.X_true)
preds = [1 if pred == -1 else 0 for pred in preds]
pred_str = ""
for pred in preds:
pred_str += str(pred) + ","
pred_str = pred_str[:-1]
print(pred_str)
if __name__ == "__main__":
# training()
predicting()