From 209dc94a75fbe91d59e5cba2cac715b36abaa7d0 Mon Sep 17 00:00:00 2001 From: Vorpalwolf33 <47892886+Vorpalwolf33@users.noreply.github.com> Date: Sun, 20 Oct 2019 12:20:18 +0530 Subject: [PATCH 1/3] Create linear-regression_vorpal.py --- linear-regression_vorpal.py | 53 +++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 linear-regression_vorpal.py diff --git a/linear-regression_vorpal.py b/linear-regression_vorpal.py new file mode 100644 index 0000000..e0e8639 --- /dev/null +++ b/linear-regression_vorpal.py @@ -0,0 +1,53 @@ +import numpy as np +import matplotlib.pyplot as plt + +def estimate_coef(x, y): + # number of observations/points + n = np.size(x) + + # mean of x and y vector + m_x, m_y = np.mean(x), np.mean(y) + + # calculating cross-deviation and deviation about x + SS_xy = np.sum(y*x) - n*m_y*m_x + SS_xx = np.sum(x*x) - n*m_x*m_x + + # calculating regression coefficients + b_1 = SS_xy / SS_xx + b_0 = m_y - b_1*m_x + + return(b_0, b_1) + +def plot_regression_line(x, y, b): + # plotting the actual points as scatter plot + plt.scatter(x, y, color = "m", + marker = "o", s = 30) + + # predicted response vector + y_pred = b[0] + b[1]*x + + # plotting the regression line + plt.plot(x, y_pred, color = "g") + + # putting labels + plt.xlabel('x') + plt.ylabel('y') + + # function to show plot + plt.show() + +def main(): + # observations + x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) + + # estimating coefficients + b = estimate_coef(x, y) + print("Estimated coefficients:\nb_0 = {} \ + \nb_1 = {}".format(b[0], b[1])) + + # plotting regression line + plot_regression_line(x, y, b) + +if __name__ == "__main__": + main() From 03762bd6d531a6468fc78ed09f24034278552875 Mon Sep 17 00:00:00 2001 From: Vorpalwolf33 <47892886+Vorpalwolf33@users.noreply.github.com> Date: Sun, 20 Oct 2019 12:23:37 +0530 Subject: [PATCH 2/3] Create logistic-regression_vorpal.py --- logistic-regression_vorpal.py | 138 ++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 logistic-regression_vorpal.py diff --git a/logistic-regression_vorpal.py b/logistic-regression_vorpal.py new file mode 100644 index 0000000..d0bcf76 --- /dev/null +++ b/logistic-regression_vorpal.py @@ -0,0 +1,138 @@ +import csv +import numpy as np +import matplotlib.pyplot as plt + + +def loadCSV(filename): + ''' + function to load dataset + ''' + with open(filename,"r") as csvfile: + lines = csv.reader(csvfile) + dataset = list(lines) + for i in range(len(dataset)): + dataset[i] = [float(x) for x in dataset[i]] + return np.array(dataset) + + +def normalize(X): + ''' + function to normalize feature matrix, X + ''' + mins = np.min(X, axis = 0) + maxs = np.max(X, axis = 0) + rng = maxs - mins + norm_X = 1 - ((maxs - X)/rng) + return norm_X + + +def logistic_func(beta, X): + ''' + logistic(sigmoid) function + ''' + return 1.0/(1 + np.exp(-np.dot(X, beta.T))) + + +def log_gradient(beta, X, y): + ''' + logistic gradient function + ''' + first_calc = logistic_func(beta, X) - y.reshape(X.shape[0], -1) + final_calc = np.dot(first_calc.T, X) + return final_calc + + +def cost_func(beta, X, y): + ''' + cost function, J + ''' + log_func_v = logistic_func(beta, X) + y = np.squeeze(y) + step1 = y * np.log(log_func_v) + step2 = (1 - y) * np.log(1 - log_func_v) + final = -step1 - step2 + return np.mean(final) + + +def grad_desc(X, y, beta, lr=.01, converge_change=.001): + ''' + gradient descent function + ''' + cost = cost_func(beta, X, y) + change_cost = 1 + num_iter = 1 + + while(change_cost > converge_change): + old_cost = cost + beta = beta - (lr * log_gradient(beta, X, y)) + cost = cost_func(beta, X, y) + change_cost = old_cost - cost + num_iter += 1 + + return beta, num_iter + + +def pred_values(beta, X): + ''' + function to predict labels + ''' + pred_prob = logistic_func(beta, X) + pred_value = np.where(pred_prob >= .5, 1, 0) + return np.squeeze(pred_value) + + +def plot_reg(X, y, beta): + ''' + function to plot decision boundary + ''' + # labelled observations + x_0 = X[np.where(y == 0.0)] + x_1 = X[np.where(y == 1.0)] + + # plotting points with diff color for diff label + plt.scatter([x_0[:, 1]], [x_0[:, 2]], c='b', label='y = 0') + plt.scatter([x_1[:, 1]], [x_1[:, 2]], c='r', label='y = 1') + + # plotting decision boundary + x1 = np.arange(0, 1, 0.1) + x2 = -(beta[0,0] + beta[0,1]*x1)/beta[0,2] + plt.plot(x1, x2, c='k', label='reg line') + + plt.xlabel('x1') + plt.ylabel('x2') + plt.legend() + plt.show() + + + +if __name__ == "__main__": + # load the dataset + dataset = loadCSV('dataset1.csv') + + # normalizing feature matrix + X = normalize(dataset[:, :-1]) + + # stacking columns wth all ones in feature matrix + X = np.hstack((np.matrix(np.ones(X.shape[0])).T, X)) + + # response vector + y = dataset[:, -1] + + # initial beta values + beta = np.matrix(np.zeros(X.shape[1])) + + # beta values after running gradient descent + beta, num_iter = grad_desc(X, y, beta) + + # estimated beta values and number of iterations + print("Estimated regression coefficients:", beta) + print("No. of iterations:", num_iter) + + # predicted labels + y_pred = pred_values(beta, X) + + # number of correctly predicted labels + print("Correctly predicted labels:", np.sum(y == y_pred)) + + # plotting regression line + plot_reg(X, y, beta) From 1c5ddec985f268a9d54214ced3f8e1701b2297ee Mon Sep 17 00:00:00 2001 From: Vorpalwolf33 <47892886+Vorpalwolf33@users.noreply.github.com> Date: Sun, 20 Oct 2019 12:26:02 +0530 Subject: [PATCH 3/3] Delete linear-regression_vorpal.py --- linear-regression_vorpal.py | 53 ------------------------------------- 1 file changed, 53 deletions(-) delete mode 100644 linear-regression_vorpal.py diff --git a/linear-regression_vorpal.py b/linear-regression_vorpal.py deleted file mode 100644 index e0e8639..0000000 --- a/linear-regression_vorpal.py +++ /dev/null @@ -1,53 +0,0 @@ -import numpy as np -import matplotlib.pyplot as plt - -def estimate_coef(x, y): - # number of observations/points - n = np.size(x) - - # mean of x and y vector - m_x, m_y = np.mean(x), np.mean(y) - - # calculating cross-deviation and deviation about x - SS_xy = np.sum(y*x) - n*m_y*m_x - SS_xx = np.sum(x*x) - n*m_x*m_x - - # calculating regression coefficients - b_1 = SS_xy / SS_xx - b_0 = m_y - b_1*m_x - - return(b_0, b_1) - -def plot_regression_line(x, y, b): - # plotting the actual points as scatter plot - plt.scatter(x, y, color = "m", - marker = "o", s = 30) - - # predicted response vector - y_pred = b[0] + b[1]*x - - # plotting the regression line - plt.plot(x, y_pred, color = "g") - - # putting labels - plt.xlabel('x') - plt.ylabel('y') - - # function to show plot - plt.show() - -def main(): - # observations - x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12]) - - # estimating coefficients - b = estimate_coef(x, y) - print("Estimated coefficients:\nb_0 = {} \ - \nb_1 = {}".format(b[0], b[1])) - - # plotting regression line - plot_regression_line(x, y, b) - -if __name__ == "__main__": - main()