diff --git a/your-code/main.ipynb b/your-code/main.ipynb
index 406a5e2..8635465 100644
--- a/your-code/main.ipynb
+++ b/your-code/main.ipynb
@@ -12,7 +12,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"id": "00cf591d-8a5b-499e-8715-1ad140867934",
"metadata": {},
"outputs": [],
@@ -21,6 +21,7 @@
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.metrics import accuracy_score\n",
"\n",
"# Load the dataset (change the path if needed)\n",
"df = pd.read_csv('../data/heart.csv')"
@@ -28,10 +29,158 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"id": "0bb5ea1c-a4e5-4419-bae8-661fe2d82711",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " sex | \n",
+ " cp | \n",
+ " trestbps | \n",
+ " chol | \n",
+ " fbs | \n",
+ " restecg | \n",
+ " thalach | \n",
+ " exang | \n",
+ " oldpeak | \n",
+ " slope | \n",
+ " ca | \n",
+ " thal | \n",
+ " target | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 63 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 145 | \n",
+ " 233 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 150 | \n",
+ " 0 | \n",
+ " 2.3 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 37 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 130 | \n",
+ " 250 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 187 | \n",
+ " 0 | \n",
+ " 3.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 41 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 130 | \n",
+ " 204 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 172 | \n",
+ " 0 | \n",
+ " 1.4 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 56 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 120 | \n",
+ " 236 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 178 | \n",
+ " 0 | \n",
+ " 0.8 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 57 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 120 | \n",
+ " 354 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 163 | \n",
+ " 1 | \n",
+ " 0.6 | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
+ "0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
+ "1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
+ "2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
+ "3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
+ "4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
+ "\n",
+ " ca thal target \n",
+ "0 0 1 1 \n",
+ "1 0 2 1 \n",
+ "2 0 2 1 \n",
+ "3 0 2 1 \n",
+ "4 0 2 1 "
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"df.head()"
]
@@ -46,7 +195,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"id": "23ad7e40-87f3-4b93-bef9-a9ddb5881ddc",
"metadata": {},
"outputs": [],
@@ -75,10 +224,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"id": "d39376f1-b4ca-44c0-8364-d11b9a7605f9",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test accuracy: 0.8026315789473685\n",
+ "Train accuracy: 1.0\n"
+ ]
+ }
+ ],
"source": [
"#Create and Train a Decision Tree Classifier and print the train and test accuracy\n",
"\n",
@@ -86,12 +244,18 @@
"from sklearn.metrics import accuracy_score, mean_squared_error\n",
"\n",
"# Train Decision Tree\n",
- "\n",
+ "dt_model = DecisionTreeClassifier()\n",
+ "dt_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Predictions and evaluation\n",
+ "y_pred_test = dt_model.predict(X_test_scaled)\n",
+ "y_pred_train = dt_model.predict(X_train_scaled)\n",
"\n",
- "\n",
- "# Evaluate performance\n"
+ "# Evaluate performance\n",
+ "accuracy_test = accuracy_score(y_test, y_pred_test)\n",
+ "accuracy_train = accuracy_score(y_train, y_pred_train)\n",
+ "print(f\"Test accuracy: {accuracy_test}\")\n",
+ "print(f\"Train accuracy: {accuracy_train}\")"
]
},
{
@@ -104,7 +268,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"id": "9c60160a-b179-4896-a026-4beab803bb4e",
"metadata": {},
"outputs": [],
@@ -135,10 +299,19 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 32,
"id": "8fc76766-a90c-47ed-bd02-66827a1dc115",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test accuracy: 0.8026315789473685\n",
+ "Train accuracy: 0.986784140969163\n"
+ ]
+ }
+ ],
"source": [
"# Create and Train a BaggingClassifier. \n",
"# Use as base estimator a weak decision tree (max_depth=1) and 100 estimators to really over a lot of different data samples\n",
@@ -147,12 +320,18 @@
"from sklearn.ensemble import BaggingClassifier\n",
"\n",
"# Train BaggingClassifier\n",
- "\n",
+ "bag_model = BaggingClassifier()\n",
+ "bag_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Predictions and evaluation\n",
+ "y_pred_test = bag_model.predict(X_test_scaled)\n",
+ "y_pred_train = bag_model.predict(X_train_scaled)\n",
"\n",
- "\n",
- "# Evaluate performance\n"
+ "# Evaluate performance\n",
+ "accuracy_test = accuracy_score(y_test, y_pred_test)\n",
+ "accuracy_train = accuracy_score(y_train, y_pred_train)\n",
+ "print(f\"Test accuracy: {accuracy_test}\")\n",
+ "print(f\"Train accuracy: {accuracy_train}\")"
]
},
{
@@ -165,7 +344,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 22,
"id": "9f892484-618a-46fe-8e56-0a18fa652ed8",
"metadata": {},
"outputs": [],
@@ -192,10 +371,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 38,
"id": "4bba1773-b0b0-44ba-a838-58b8c466ff88",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Test accuracy: 0.8421052631578947\n",
+ "Train accuracy: 0.9383259911894273\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\crvid\\anaconda3\\Lib\\site-packages\\sklearn\\ensemble\\_weight_boosting.py:519: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ],
"source": [
"# Create and Train a AdaBoostClassifier. \n",
"# Use as base estimator a weak decision tree (max_depth=1) and 100 estimators to really target the specific behaviors of this phenomenon\n",
@@ -204,12 +400,18 @@
"from sklearn.ensemble import AdaBoostClassifier\n",
"\n",
"# Train AdaBoost\n",
- "\n",
+ "ada_model = AdaBoostClassifier()\n",
+ "ada_model.fit(X_train_scaled, y_train)\n",
"\n",
"# Predictions and evaluation\n",
+ "y_pred_test = ada_model.predict(X_test_scaled)\n",
+ "y_pred_train = ada_model.predict(X_train_scaled)\n",
"\n",
- "\n",
- "# Evaluate performance\n"
+ "# Evaluate performance\n",
+ "accuracy_test = accuracy_score(y_test, y_pred_test)\n",
+ "accuracy_train = accuracy_score(y_train, y_pred_train)\n",
+ "print(f\"Test accuracy: {accuracy_test}\")\n",
+ "print(f\"Train accuracy: {accuracy_train}\")"
]
},
{
@@ -222,7 +424,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"id": "4b5e21fe-0a8f-45f6-a2d3-74261941f9c1",
"metadata": {},
"outputs": [],
@@ -235,7 +437,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "Python 3 (ipykernel)",
+ "display_name": "base",
"language": "python",
"name": "python3"
},
@@ -249,7 +451,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.12.2"
+ "version": "3.12.4"
}
},
"nbformat": 4,