From a3bd83bb85bcc8069ce67b8baf05ef4bcc0369d3 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 00:43:52 +0100 Subject: [PATCH 01/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index fa192115..a8c7e446 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -49,23 +49,6 @@ "df['Y'] = sample_data.target" ] }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(442, 10)\n" - ] - } - ], - "source": [ - "print(df.shape)" - ] - }, { "cell_type": "code", "execution_count": 11, From c3b6e0a3b8043f8947d00a5be08ab4dee77dae0d Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:24:35 +0100 Subject: [PATCH 02/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 286 +----------------- 1 file changed, 3 insertions(+), 283 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index a8c7e446..49cd4061 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -1,19 +1,3 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Train a Ridge Regression Model on the Diabetes Dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model." - ] - }, { "cell_type": "code", "execution_count": 1, @@ -28,13 +12,6 @@ "import pandas as pd" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Data" - ] - }, { "cell_type": "code", "execution_count": 6, @@ -49,211 +26,6 @@ "df['Y'] = sample_data.target" ] }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexbmibps1s2s3s4s5s6Y
count4.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+02442.000000
mean-3.634285e-161.308343e-16-8.045349e-161.281655e-16-8.835316e-171.327024e-16-4.574646e-163.777301e-16-3.830854e-16-3.412882e-16152.133484
std4.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-0277.093005
min-1.072256e-01-4.464164e-02-9.027530e-02-1.123996e-01-1.267807e-01-1.156131e-01-1.023071e-01-7.639450e-02-1.260974e-01-1.377672e-0125.000000
25%-3.729927e-02-4.464164e-02-3.422907e-02-3.665645e-02-3.424784e-02-3.035840e-02-3.511716e-02-3.949338e-02-3.324879e-02-3.317903e-0287.000000
50%5.383060e-03-4.464164e-02-7.283766e-03-5.670611e-03-4.320866e-03-3.819065e-03-6.584468e-03-2.592262e-03-1.947634e-03-1.077698e-03140.500000
75%3.807591e-025.068012e-023.124802e-023.564384e-022.835801e-022.984439e-022.931150e-023.430886e-023.243323e-022.791705e-02211.500000
max1.107267e-015.068012e-021.705552e-011.320442e-011.539137e-011.987880e-011.811791e-011.852344e-011.335990e-011.356118e-01346.000000
\n", - "
" - ], - "text/plain": [ - " age sex bmi bp s1 \\\n", - "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", - "mean -3.634285e-16 1.308343e-16 -8.045349e-16 1.281655e-16 -8.835316e-17 \n", - "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", - "min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01 \n", - "25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02 \n", - "50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03 \n", - "75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564384e-02 2.835801e-02 \n", - "max 1.107267e-01 5.068012e-02 1.705552e-01 1.320442e-01 1.539137e-01 \n", - "\n", - " s2 s3 s4 s5 s6 \\\n", - "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", - "mean 1.327024e-16 -4.574646e-16 3.777301e-16 -3.830854e-16 -3.412882e-16 \n", - "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", - "min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n", - "25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n", - "50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n", - "75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n", - "max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 \n", - "\n", - " Y \n", - "count 442.000000 \n", - "mean 152.133484 \n", - "std 77.093005 \n", - "min 25.000000 \n", - "25% 87.000000 \n", - "50% 140.500000 \n", - "75% 211.500000 \n", - "max 346.000000 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# All data in a single dataframe\n", - "df.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Split Data into Training and Validation Sets" - ] - }, { "cell_type": "code", "execution_count": 12, @@ -269,30 +41,11 @@ " \"test\": {\"X\": X_test, \"y\": y_test}}" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train Model on Training Set" - ] - }, { "cell_type": "code", "execution_count": 16, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,\n", - " normalize=False, random_state=None, solver='auto', tol=0.001)" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# experiment parameters\n", "args = {\n", @@ -303,26 +56,11 @@ "reg_model.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Validate Model on Validation Set" - ] - }, { "cell_type": "code", "execution_count": 18, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'mse': 3298.9096058070622}\n" - ] - } - ], + "outputs": [], "source": [ "preds = reg_model.predict(data[\"test\"][\"X\"])\n", "mse = mean_squared_error(preds, y_test)\n", @@ -330,29 +68,11 @@ "print(metrics)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Save Model" - ] - }, { "cell_type": "code", "execution_count": 7, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['sklearn_regression_model.pkl']" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_name = \"sklearn_regression_model.pkl\"\n", "\n", From f2ff0b6baa98b5d4d1733f164fc5e1e0801eb303 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:29:18 +0100 Subject: [PATCH 03/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 35 +------------------ 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 49cd4061..25bbfb7e 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -68,37 +68,4 @@ "print(metrics)" ] }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "model_name = \"sklearn_regression_model.pkl\"\n", - "\n", - "joblib.dump(value=reg, filename=model_name)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + From 465393cf3fe70206609a75cd4acd78415ae4fe67 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:29:52 +0100 Subject: [PATCH 04/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 25bbfb7e..cf986d2a 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -67,5 +67,5 @@ "metrics = {\"mse\": mse}\n", "print(metrics)" ] - }, + } From 304587e6350b8f368602958e7ce6d81196c8c91d Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:38:48 +0100 Subject: [PATCH 05/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index cf986d2a..6f7891ca 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -1,4 +1,20 @@ +{ + "cells": [ { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Train a Ridge Regression Model on the Diabetes Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model." + ] +}, +{ "cell_type": "code", "execution_count": 1, "metadata": {}, From 7e07ff57e1cf16a0391b64ded07956944589bfc4 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:42:44 +0100 Subject: [PATCH 06/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 324 +++++++++++++++++- 1 file changed, 319 insertions(+), 5 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 6f7891ca..fa192115 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -13,8 +13,8 @@ "source": [ "This notebook loads the Diabetes dataset from sklearn, splits the data into training and validation sets, trains a Ridge regression model, validates the model on the validation set, and saves the model." ] -}, -{ + }, + { "cell_type": "code", "execution_count": 1, "metadata": {}, @@ -28,6 +28,13 @@ "import pandas as pd" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Data" + ] + }, { "cell_type": "code", "execution_count": 6, @@ -42,6 +49,228 @@ "df['Y'] = sample_data.target" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(442, 10)\n" + ] + } + ], + "source": [ + "print(df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexbmibps1s2s3s4s5s6Y
count4.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+02442.000000
mean-3.634285e-161.308343e-16-8.045349e-161.281655e-16-8.835316e-171.327024e-16-4.574646e-163.777301e-16-3.830854e-16-3.412882e-16152.133484
std4.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-0277.093005
min-1.072256e-01-4.464164e-02-9.027530e-02-1.123996e-01-1.267807e-01-1.156131e-01-1.023071e-01-7.639450e-02-1.260974e-01-1.377672e-0125.000000
25%-3.729927e-02-4.464164e-02-3.422907e-02-3.665645e-02-3.424784e-02-3.035840e-02-3.511716e-02-3.949338e-02-3.324879e-02-3.317903e-0287.000000
50%5.383060e-03-4.464164e-02-7.283766e-03-5.670611e-03-4.320866e-03-3.819065e-03-6.584468e-03-2.592262e-03-1.947634e-03-1.077698e-03140.500000
75%3.807591e-025.068012e-023.124802e-023.564384e-022.835801e-022.984439e-022.931150e-023.430886e-023.243323e-022.791705e-02211.500000
max1.107267e-015.068012e-021.705552e-011.320442e-011.539137e-011.987880e-011.811791e-011.852344e-011.335990e-011.356118e-01346.000000
\n", + "
" + ], + "text/plain": [ + " age sex bmi bp s1 \\\n", + "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", + "mean -3.634285e-16 1.308343e-16 -8.045349e-16 1.281655e-16 -8.835316e-17 \n", + "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", + "min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01 \n", + "25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02 \n", + "50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03 \n", + "75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564384e-02 2.835801e-02 \n", + "max 1.107267e-01 5.068012e-02 1.705552e-01 1.320442e-01 1.539137e-01 \n", + "\n", + " s2 s3 s4 s5 s6 \\\n", + "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", + "mean 1.327024e-16 -4.574646e-16 3.777301e-16 -3.830854e-16 -3.412882e-16 \n", + "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", + "min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n", + "25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n", + "50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n", + "75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n", + "max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 \n", + "\n", + " Y \n", + "count 442.000000 \n", + "mean 152.133484 \n", + "std 77.093005 \n", + "min 25.000000 \n", + "25% 87.000000 \n", + "50% 140.500000 \n", + "75% 211.500000 \n", + "max 346.000000 " + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# All data in a single dataframe\n", + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split Data into Training and Validation Sets" + ] + }, { "cell_type": "code", "execution_count": 12, @@ -57,11 +286,30 @@ " \"test\": {\"X\": X_test, \"y\": y_test}}" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train Model on Training Set" + ] + }, { "cell_type": "code", "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,\n", + " normalize=False, random_state=None, solver='auto', tol=0.001)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# experiment parameters\n", "args = {\n", @@ -72,16 +320,82 @@ "reg_model.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validate Model on Validation Set" + ] + }, { "cell_type": "code", "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'mse': 3298.9096058070622}\n" + ] + } + ], "source": [ "preds = reg_model.predict(data[\"test\"][\"X\"])\n", "mse = mean_squared_error(preds, y_test)\n", "metrics = {\"mse\": mse}\n", "print(metrics)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['sklearn_regression_model.pkl']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_name = \"sklearn_regression_model.pkl\"\n", + "\n", + "joblib.dump(value=reg, filename=model_name)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" } - + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 380d272db475f57cfc5358b668a1034a12097c7d Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:46:38 +0100 Subject: [PATCH 07/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index fa192115..a8c7e446 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -49,23 +49,6 @@ "df['Y'] = sample_data.target" ] }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(442, 10)\n" - ] - } - ], - "source": [ - "print(df.shape)" - ] - }, { "cell_type": "code", "execution_count": 11, From f3aec3c0f3be65cdf796448e432ad08e7eb2ebe1 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 01:51:19 +0100 Subject: [PATCH 08/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 198 ------------------ 1 file changed, 198 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index a8c7e446..b95b2e54 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -49,204 +49,6 @@ "df['Y'] = sample_data.target" ] }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
agesexbmibps1s2s3s4s5s6Y
count4.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+024.420000e+02442.000000
mean-3.634285e-161.308343e-16-8.045349e-161.281655e-16-8.835316e-171.327024e-16-4.574646e-163.777301e-16-3.830854e-16-3.412882e-16152.133484
std4.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-024.761905e-0277.093005
min-1.072256e-01-4.464164e-02-9.027530e-02-1.123996e-01-1.267807e-01-1.156131e-01-1.023071e-01-7.639450e-02-1.260974e-01-1.377672e-0125.000000
25%-3.729927e-02-4.464164e-02-3.422907e-02-3.665645e-02-3.424784e-02-3.035840e-02-3.511716e-02-3.949338e-02-3.324879e-02-3.317903e-0287.000000
50%5.383060e-03-4.464164e-02-7.283766e-03-5.670611e-03-4.320866e-03-3.819065e-03-6.584468e-03-2.592262e-03-1.947634e-03-1.077698e-03140.500000
75%3.807591e-025.068012e-023.124802e-023.564384e-022.835801e-022.984439e-022.931150e-023.430886e-023.243323e-022.791705e-02211.500000
max1.107267e-015.068012e-021.705552e-011.320442e-011.539137e-011.987880e-011.811791e-011.852344e-011.335990e-011.356118e-01346.000000
\n", - "
" - ], - "text/plain": [ - " age sex bmi bp s1 \\\n", - "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", - "mean -3.634285e-16 1.308343e-16 -8.045349e-16 1.281655e-16 -8.835316e-17 \n", - "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", - "min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01 \n", - "25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02 \n", - "50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03 \n", - "75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564384e-02 2.835801e-02 \n", - "max 1.107267e-01 5.068012e-02 1.705552e-01 1.320442e-01 1.539137e-01 \n", - "\n", - " s2 s3 s4 s5 s6 \\\n", - "count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n", - "mean 1.327024e-16 -4.574646e-16 3.777301e-16 -3.830854e-16 -3.412882e-16 \n", - "std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n", - "min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n", - "25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n", - "50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n", - "75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n", - "max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 \n", - "\n", - " Y \n", - "count 442.000000 \n", - "mean 152.133484 \n", - "std 77.093005 \n", - "min 25.000000 \n", - "25% 87.000000 \n", - "50% 140.500000 \n", - "75% 211.500000 \n", - "max 346.000000 " - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# All data in a single dataframe\n", - "df.describe()" - ] - }, { "cell_type": "markdown", "metadata": {}, From 77c01c2b37c50e290ca02a20b4401e7cf1c284d6 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:05:06 +0100 Subject: [PATCH 09/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index b95b2e54..2caab73f 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -62,6 +62,7 @@ "metadata": {}, "outputs": [], "source": [ + "def split_data(df):" "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", @@ -69,6 +70,7 @@ " X, y, test_size=0.2, random_state=0)\n", "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", " \"test\": {\"X\": X_test, \"y\": y_test}}" + "return data" ] }, { From 82209b807e90876d5ee3cd204187ad29d2387102 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:07:12 +0100 Subject: [PATCH 10/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 2caab73f..db4f12ff 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -70,7 +70,6 @@ " X, y, test_size=0.2, random_state=0)\n", "data = {\"train\": {\"X\": X_train, \"y\": y_train},\n", " \"test\": {\"X\": X_test, \"y\": y_test}}" - "return data" ] }, { From f0f7d8dac6eb8d2439a10e433de7071422453e0b Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:08:08 +0100 Subject: [PATCH 11/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index db4f12ff..4d5eee54 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -62,7 +62,6 @@ "metadata": {}, "outputs": [], "source": [ - "def split_data(df):" "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", @@ -96,7 +95,7 @@ "output_type": "execute_result" } ], - "source": [ + "source": [ "# experiment parameters\n", "args = {\n", " \"alpha\": 0.5\n", From f7c5a8315d021cdc1605f87a56292a84274ab559 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:25:32 +0100 Subject: [PATCH 12/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 4d5eee54..897d69fb 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -56,6 +56,21 @@ "## Split Data into Training and Validation Sets" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split Data into Training and Validation Sets" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": ["def = split_data(df):" + ] + }, { "cell_type": "code", "execution_count": 12, From 6affa5bc6add28de85d65f516d5f0298f0dad23b Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:29:39 +0100 Subject: [PATCH 13/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 897d69fb..e41dabfc 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -56,13 +56,6 @@ "## Split Data into Training and Validation Sets" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Split Data into Training and Validation Sets" - ] - }, { "cell_type": "code", "execution_count": 10, From 553b6eea153c5c64d1251281dac6a4199fc43d22 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:34:50 +0100 Subject: [PATCH 14/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index e41dabfc..f9c2d20e 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -53,7 +53,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Split Data into Training and Validation Sets" + "## Split the dataframe into test and train data" ] }, { From 9bee5a799b73c94aa65e8cc9654463b829b28421 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:36:45 +0100 Subject: [PATCH 15/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index f9c2d20e..746e1f8e 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ From a793e9bf84e46c5eccff94be6ff0db9e20ec0793 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 02:56:47 +0100 Subject: [PATCH 16/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 746e1f8e..65c880e7 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -60,16 +60,9 @@ "cell_type": "code", "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": ["return data" + ], "source": ["def = split_data(df):" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", From 7ddc60fe99a9b7477a16846c4f8cfe93bc703777 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:06:01 +0100 Subject: [PATCH 17/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 65c880e7..4f219500 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -60,8 +60,7 @@ "cell_type": "code", "execution_count": 10, "metadata": {}, - "outputs": ["return data" - ], + "outputs": [], "source": ["def = split_data(df):" "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", From 9945fae7d76b33b4f15c558c7e8bd3c27f203efb Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:12:07 +0100 Subject: [PATCH 18/23] Update Diabetes Ridge Regression Training.ipynb --- .../Diabetes Ridge Regression Training.ipynb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 4f219500..b25a3d16 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -62,6 +62,16 @@ "metadata": {}, "outputs": [], "source": ["def = split_data(df):" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": ["def = split_data(df):" + ] + }, "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", From 79ae7f556852656ec862b301fcc30c16d605a40c Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:15:56 +0100 Subject: [PATCH 19/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index b25a3d16..6df1a01d 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -70,8 +70,7 @@ "metadata": {}, "outputs": [], "source": ["def = split_data(df):" - ] - }, + ], "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", From 2b98bb33bd25fd6db33a66573338e95ed1f5730e Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:22:26 +0100 Subject: [PATCH 20/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 6df1a01d..03a36a48 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -70,7 +70,6 @@ "metadata": {}, "outputs": [], "source": ["def = split_data(df):" - ], "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", From 7b2e7f047ff9f7c8006903b30e2d9700c06e2500 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:29:50 +0100 Subject: [PATCH 21/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index 03a36a48..bc258fe4 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -61,7 +61,8 @@ "execution_count": 10, "metadata": {}, "outputs": [], - "source": ["def = split_data(df):" + "source": [ + "def = split_data(df):" ] }, { @@ -69,7 +70,8 @@ "execution_count": 10, "metadata": {}, "outputs": [], - "source": ["def = split_data(df):" + "source": [ + "def = split_data(df):", "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", From c8ce23270763032dda78b9134f6fcbed5bf7cea5 Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:33:59 +0100 Subject: [PATCH 22/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index bc258fe4..a4ad3767 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -71,7 +71,7 @@ "metadata": {}, "outputs": [], "source": [ - "def = split_data(df):", + "def = split_data(df):\n", "X = df.drop('Y', axis=1).values\n", "y = df['Y'].values\n", "\n", From b253b151c3ffbc63c8dc1be338ea68a795f1896e Mon Sep 17 00:00:00 2001 From: Ugwu Gabby <107874044+GabbyHills@users.noreply.github.com> Date: Sat, 5 Aug 2023 03:42:48 +0100 Subject: [PATCH 23/23] Update Diabetes Ridge Regression Training.ipynb --- experimentation/Diabetes Ridge Regression Training.ipynb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/experimentation/Diabetes Ridge Regression Training.ipynb b/experimentation/Diabetes Ridge Regression Training.ipynb index a4ad3767..7fbce6ee 100644 --- a/experimentation/Diabetes Ridge Regression Training.ipynb +++ b/experimentation/Diabetes Ridge Regression Training.ipynb @@ -53,7 +53,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Split the dataframe into test and train data" + "## Split Data into Training and Validation Sets" ] }, { @@ -65,6 +65,13 @@ "def = split_data(df):" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Split the dataframe into test and train data" + ] + }, { "cell_type": "code", "execution_count": 10,