KamandPrompt · BotKiller04 · Oct 26, 2023
diff --git a/Cipher.ipynb b/Cipher.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 159,
+   "id": "7cda9455-b81f-4d01-bbc2-cebc8b57cc4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt, pandas as pd,numpy as np\n",
+    "from sklearn.impute import SimpleImputer, KNNImputer\n",
+    "from sklearn.ensemble import AdaBoostClassifier\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "import sklearn.model_selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 160,
+   "id": "626d5b54-0be2-4923-86ca-f3ad5a3b4c17",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#reading testing and training files using pandas and dropping columns with least covariance with systolic and diastolic bp\n",
+    "df = pd.read_csv(\"ML101_train_dataset.csv\")\n",
+    "df_test=pd.read_csv(\"ML101_dataset_test_feature.csv\")\n",
+    "df.dropna(subset=['Caloric Intake','Height','Cholesterol level','Average Daily Steps','Hours of Sleep','Gender','LifeStyle','Systolic BP','Diastolic BP'], inplace=True)\n",
+    "df=df.reset_index(drop=True)\n",
+    "df_test = df_test.drop('Gender', axis=1)\n",
+    "df = df.drop('Gender', axis=1)\n",
+    "df_test = df_test.drop('Average Daily Steps', axis=1)\n",
+    "df = df.drop('Average Daily Steps', axis=1)\n",
+    "df_test = df_test.drop('Hours of Sleep', axis=1)\n",
+    "df = df.drop('Hours of Sleep', axis=1)\n",
+    "attributes = df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 161,
+   "id": "1caa087e-bc68-4941-b85f-6764b574d9cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#replacing nan values of columns using mean  \n",
+    "columns_to_impute = ['Caloric Intake','Age','Weight','Height','Cholesterol level','Blood Sugar level']\n",
+    "imputer = SimpleImputer(strategy='mean')\n",
+    "df[columns_to_impute] = imputer.fit_transform(df[columns_to_impute])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 162,
+   "id": "4c31cb17-dec0-4cb5-b6be-3767535d106b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#creating another numpy arrays such that each row starts with constant value 1 \n",
+    "y1=df['Systolic BP']\n",
+    "y2=df['Diastolic BP']\n",
+    "y3=df['LifeStyle']\n",
+    "x_test=np.array(df_test)\n",
+    "x=np.array(df[attributes[0:len(attributes)-3]])\n",
+    "x_test1=np.array([[1]*(len(x[0])+1)]*len(x_test),dtype=float)\n",
+    "x_final=np.array([[1]*(len(x[0])+1)]*len(x),dtype=float)\n",
+    "for i in range(1,len(x_final[0])):\n",
+    "    for j in range(len(x)):\n",
+    "        x_final[j][i]=x[j][i-1]\n",
+    "for i in range(1,len(x_test1[0])):\n",
+    "    for j in range(len(x_test1)):\n",
+    "        x_test1[j][i]=x_test[j][i-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 163,
+   "id": "8b0b33cf-37f8-46e4-b493-82b6962e4b57",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#finding parameters for both systolic and diastolic bp by calculating dagger of data matrix and then multiplying it with target variable and then\n",
+    "#writing predicted values to another csv file and also adding two columns in test data dataframe so as to use during classification \n",
+    "systolic_bp=[]\n",
+    "diastolic_bp=[]\n",
+    "w_sys=np.matmul(np.matmul(np.linalg.inv(np.matmul(x_final.transpose(),x_final)),x_final.transpose()),y1)\n",
+    "w_dia=np.matmul(np.matmul(np.linalg.inv(np.matmul(x_final.transpose(),x_final)),x_final.transpose()),y2)\n",
+    "for i in range(len(x_test1)):\n",
+    "    systolic_bp.append(np.matmul(w_sys.transpose(),x_test1[i]))\n",
+    "    diastolic_bp.append(np.matmul(w_dia.transpose(),x_test1[i]))\n",
+    "systolic_bp=np.array(systolic_bp)\n",
+    "diastolic_bp=np.array(diastolic_bp)\n",
+    "op_df=pd.DataFrame({\"Systolic BP\":pd.Series(systolic_bp),\"Diastolic BP\":pd.Series(diastolic_bp)})\n",
+    "op_df=op_df.rename_axis(\"ID\")            \n",
+    "\n",
+    "op_df.to_csv(\"regression_cipher.csv\")\n",
+    "\n",
+    "df_test['Systolic BP']=systolic_bp\n",
+    "df_test['Diastolic BP']=diastolic_bp\n",
+    "x_test2=np.array(df_test)\n",
+    "x_train2=np.array(df[attributes[:len(attributes)-1]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 164,
+   "id": "e597c37f-b378-4530-8ff7-d40e3fbacb2d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#creating an adaboost classifier with the decision tree as the base classifier and fitting the ensemble classifier to data and making predictions and \n",
+    "#writing it to csv\n",
+    "base_classifier = DecisionTreeClassifier()\n",
+    "ensemble_classifier = AdaBoostClassifier(base_classifier, n_estimators=50)\n",
+    "ensemble_classifier.fit(x_train2, y3)\n",
+    "y_pred = ensemble_classifier.predict(x_test2)\n",
+    "df_final=pd.DataFrame({\"LifeStyle\":y_pred})\n",
+    "df_final.to_csv(\"classification_cipher.csv\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}