-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSGDClass_Pipe.py
35 lines (31 loc) · 1.35 KB
/
SGDClass_Pipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
import os
os.chdir("C:\Training\Academy\Statistics (Python)\Cases\Wisconsin")
df = pd.read_csv("BreastCancer.csv")
dum_df = pd.get_dummies(df,drop_first=True)
X = dum_df.iloc[:,1:-1]
y = dum_df.iloc[:,-1]
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=2022,
test_size=0.3,
stratify=y)
scaler = MinMaxScaler()
model = SGDClassifier(loss='log',random_state=2022)
pipe = Pipeline([('scaler', scaler), ('SGD', model)])
pipe.fit(X_train, y_train)
y_pred_prob = pipe.predict_proba(X_test)[:,1]
print(roc_auc_score(y_test,y_pred_prob))
########### Grid Search CV #####################
kfold = StratifiedKFold(n_splits=5,shuffle=True,random_state=2022)
params = {'SGD__eta0':np.linspace(0.0001,0.8,7),
'SGD__learning_rate':['constant','optimal',
'invscaling','adaptive']}
gcv = GridSearchCV(pipe,scoring='roc_auc',cv=kfold,param_grid=params)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)