diff --git a/your-code/main.ipynb b/your-code/main.ipynb index 406a5e2..8635465 100644 --- a/your-code/main.ipynb +++ b/your-code/main.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "00cf591d-8a5b-499e-8715-1ad140867934", "metadata": {}, "outputs": [], @@ -21,6 +21,7 @@ "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.metrics import accuracy_score\n", "\n", "# Load the dataset (change the path if needed)\n", "df = pd.read_csv('../data/heart.csv')" @@ -28,10 +29,158 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "0bb5ea1c-a4e5-4419-bae8-661fe2d82711", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
\n", + "
" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 0 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 0 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 2 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 2 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 2 \n", + "\n", + " ca thal target \n", + "0 0 1 1 \n", + "1 0 2 1 \n", + "2 0 2 1 \n", + "3 0 2 1 \n", + "4 0 2 1 " + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.head()" ] @@ -46,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "23ad7e40-87f3-4b93-bef9-a9ddb5881ddc", "metadata": {}, "outputs": [], @@ -75,10 +224,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "d39376f1-b4ca-44c0-8364-d11b9a7605f9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy: 0.8026315789473685\n", + "Train accuracy: 1.0\n" + ] + } + ], "source": [ "#Create and Train a Decision Tree Classifier and print the train and test accuracy\n", "\n", @@ -86,12 +244,18 @@ "from sklearn.metrics import accuracy_score, mean_squared_error\n", "\n", "# Train Decision Tree\n", - "\n", + "dt_model = DecisionTreeClassifier()\n", + "dt_model.fit(X_train_scaled, y_train)\n", "\n", "# Predictions and evaluation\n", + "y_pred_test = dt_model.predict(X_test_scaled)\n", + "y_pred_train = dt_model.predict(X_train_scaled)\n", "\n", - "\n", - "# Evaluate performance\n" + "# Evaluate performance\n", + "accuracy_test = accuracy_score(y_test, y_pred_test)\n", + "accuracy_train = accuracy_score(y_train, y_pred_train)\n", + "print(f\"Test accuracy: {accuracy_test}\")\n", + "print(f\"Train accuracy: {accuracy_train}\")" ] }, { @@ -104,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "9c60160a-b179-4896-a026-4beab803bb4e", "metadata": {}, "outputs": [], @@ -135,10 +299,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "8fc76766-a90c-47ed-bd02-66827a1dc115", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy: 0.8026315789473685\n", + "Train accuracy: 0.986784140969163\n" + ] + } + ], "source": [ "# Create and Train a BaggingClassifier. \n", "# Use as base estimator a weak decision tree (max_depth=1) and 100 estimators to really over a lot of different data samples\n", @@ -147,12 +320,18 @@ "from sklearn.ensemble import BaggingClassifier\n", "\n", "# Train BaggingClassifier\n", - "\n", + "bag_model = BaggingClassifier()\n", + "bag_model.fit(X_train_scaled, y_train)\n", "\n", "# Predictions and evaluation\n", + "y_pred_test = bag_model.predict(X_test_scaled)\n", + "y_pred_train = bag_model.predict(X_train_scaled)\n", "\n", - "\n", - "# Evaluate performance\n" + "# Evaluate performance\n", + "accuracy_test = accuracy_score(y_test, y_pred_test)\n", + "accuracy_train = accuracy_score(y_train, y_pred_train)\n", + "print(f\"Test accuracy: {accuracy_test}\")\n", + "print(f\"Train accuracy: {accuracy_train}\")" ] }, { @@ -165,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "id": "9f892484-618a-46fe-8e56-0a18fa652ed8", "metadata": {}, "outputs": [], @@ -192,10 +371,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "id": "4bba1773-b0b0-44ba-a838-58b8c466ff88", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test accuracy: 0.8421052631578947\n", + "Train accuracy: 0.9383259911894273\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\crvid\\anaconda3\\Lib\\site-packages\\sklearn\\ensemble\\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.\n", + " warnings.warn(\n" + ] + } + ], "source": [ "# Create and Train a AdaBoostClassifier. \n", "# Use as base estimator a weak decision tree (max_depth=1) and 100 estimators to really target the specific behaviors of this phenomenon\n", @@ -204,12 +400,18 @@ "from sklearn.ensemble import AdaBoostClassifier\n", "\n", "# Train AdaBoost\n", - "\n", + "ada_model = AdaBoostClassifier()\n", + "ada_model.fit(X_train_scaled, y_train)\n", "\n", "# Predictions and evaluation\n", + "y_pred_test = ada_model.predict(X_test_scaled)\n", + "y_pred_train = ada_model.predict(X_train_scaled)\n", "\n", - "\n", - "# Evaluate performance\n" + "# Evaluate performance\n", + "accuracy_test = accuracy_score(y_test, y_pred_test)\n", + "accuracy_train = accuracy_score(y_train, y_pred_train)\n", + "print(f\"Test accuracy: {accuracy_test}\")\n", + "print(f\"Train accuracy: {accuracy_train}\")" ] }, { @@ -222,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "4b5e21fe-0a8f-45f6-a2d3-74261941f9c1", "metadata": {}, "outputs": [], @@ -235,7 +437,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -249,7 +451,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.12.4" } }, "nbformat": 4,