-
Notifications
You must be signed in to change notification settings - Fork 0
/
LinearRegression.py
154 lines (131 loc) · 4.61 KB
/
LinearRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from cProfile import label
from cmath import sqrt
from turtle import color
from pyparsing import lineEnd
from data import create_regression_data
import math
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
class LinearRegression:
def __init__(self):
self.data_point_count = 1000
self.x_train, self.x_test, self.y_train, self.y_test = None, None, None, None
def compute_mean(self, values):
'''
Compute mean of a list of values
args:
values: list[int]
return:
mean: float
'''
return sum(values)/len(values)
def compute_variance(self, values, mean):
'''
Compute variance given a list and
the mean of the values
args:
values: list[int]
mean: float
return:
variance: float
'''
return sum([(x-mean)**2 for x in values])
def compute_covariance(self, x, mean_x, y, mean_y):
'''
Compute covariance given two lists and
the mean of the values in the lists
args:
x: First list of values (list[int])
mean_x: Mean of x (float)
y: Second list of values (list[int])
mean_y: Mean of y (float)
return:
covariance: float
'''
covariance = 0.0
for i in range(len(x)):
covariance += (x[i]- mean_x) * (y[i]-mean_y)
return covariance
def get_coefficients(self):
'''
Compute coefficients to fit the regression line
args:
None
return:
b0: intercept
b1: x_coefficient
'''
x_mean = self.compute_mean(self.x_train)
y_mean = self.compute_mean(self.y_train)
b1 = self.compute_covariance(self.x_train, x_mean, self.y_train, y_mean)/self.compute_variance(self.x_train, x_mean)
b0 = y_mean - b1*x_mean
return b0, b1
def make_predictions(self, b0, b1):
'''
Return predictions for the test set
args:
b0: Intercept
b1: x-coefficient
return:
predictions: list of predictions for points in the test set
'''
predictions = []
for x in self.x_test:
predictions.append(x*b1 + b0)
return predictions
def rmse_metric(self, predictions):
'''
Computes RMSE (Root Mean Square Error) between actual
and predicted values
args:
predictions: A list of predictions (list)
return:
rmse: The RMSE value (float)'''
total_squared_error = 0.0
for idx, prediction in enumerate(predictions):
pred_error = (prediction - self.y_test[idx])**2
total_squared_error += pred_error
mean_error = total_squared_error/len(predictions)
return sqrt(mean_error)
def get_data_points(self) -> np.array:
'''
Creates the train and test_set to fit the regression line to
args:
None
return:
'''
c = input("Enter number of points to fit regression line to. Press enter to use default (1000)")
if not c:
print("Using default values...")
else:
try:
self.data_point_count = int(c)
except ValueError("Accepted values are integers only"):
exit
x,y = create_regression_data.LinearRegressionData.create_data_points(math.floor(self.data_point_count*1.25))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 42)
return x_train, x_test, y_train, y_test
def plot_output(self, predictions):
'''
Plot the train, test and predicted values
args:
predictions: A list of predictions for the test set
return:
None'''
plt.scatter(self.x_train, self.y_train, color = 'red', label = "Train", marker = "4")
plt.scatter(self.x_test, self.y_test, color='green', label = "Test", marker= "*")
plt.plot(self.x_test, predictions, label = "Predictions", color = "black")
plt.legend()
plt.show()
def main(self):
self.x_train, self.x_test, self.y_train, self.y_test = self.get_data_points()
b_0, b_1 = self.get_coefficients()
predictions = self.make_predictions(b_0, b_1)
rmse = self.rmse_metric(predictions)
print("RMSE: {}".format(rmse))
self.plot_output(predictions)
if __name__=="__main__":
linear_regressor = LinearRegression()
linear_regressor.main()