-
Notifications
You must be signed in to change notification settings - Fork 0
/
ridgeRegression
91 lines (62 loc) · 3.09 KB
/
ridgeRegression
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from algorithmClass import Algorithm
import sklearn
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import scale
import numpy as np
from sklearn.metrics import mean_squared_error, make_scorer, explained_variance_score, mean_absolute_error
import mysql.connector
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import normalize
import pickle
class RidgeRegressor(Algorithm):
def split_data(self):
return
def split_features(self):
self.features = self.data
# extract and remove the fanduel points (response variable)
self.target = [features.pop(14) for features in self.features]
for i in range(12):
temp = [features.pop(0) for features in self.features]
self.features = scale(self.features)
def run_pca(self):
#applies Principal Component Analysis on data with 201 components, returns dimension-reduced training set
filename = 'PCA_model.sav'
pca = pickle.load(open(filename, 'rb'))
pca.fit(self.features)
self.features = pca.transform(self.features)
print(len(self.features[0]))
def predict(self):
self.split_features()
# self.run_pca()
filename = 'Ridge100_model.sav'
self.model = pickle.load(open(filename, 'rb'))
self.predictions = self.model.predict(
self.features) # predict for features
return self.predictions
def mse(self):
return mean_squared_error(self.target, self.predictions)
def compare(self):
for i in range(len(self.predictions)):
print self.predictions[i], self.target[i]
def main():
cnx = mysql.connector.connect(user="wsa",
host="34.68.250.121",
database="NCAAWomens",
password="LeBron>MJ!")
cursor = cnx.cursor(buffered=True)
getStuff = []
getStuff = "SELECT * from performancePlayer left join teamVsPosition on ((performancePlayer.positionID = teamVsPosition.positionID) and (performancePlayer.opponent = teamVsPosition.team) and (performancePlayer.seasonID = teamVsPosition.seasonID)) left join playerBasicAverages on ((performancePlayer.playerID = playerBasicAverages.playerID) and (performancePlayer.seasonID = playerBasicAverages.seasonID)) left join playerAdvancedAverages on ((performancePlayer.playerID = playerAdvancedAverages.playerID) and (performancePlayer.seasonID = playerAdvancedAverages.seasonID)) where performancePlayer.opponentTeamID is not null and performancePlayer.opponentTeamID < 15 and performancePlayer.minutes > 5 and playerAdvancedAverages.gamesPlayed > 5 and playerBasicAverages.minutes > 5"
cursor.execute(getStuff)
dataSet = [list(feature) for feature in cursor.fetchall()]
print dataSet[0]
print len(dataSet)
print len(dataSet[0])
ridge = RidgeRegressor(dataSet)
predictions = ridge.predict()
ridge.compare()
print predictions
print ridge.mse()
return
if __name__=="__main__":
main()