diff --git a/your-code/pandas_1.ipynb b/your-code/pandas_1.ipynb index a6c6455..907fe2e 100644 --- a/your-code/pandas_1.ipynb +++ b/your-code/pandas_1.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -44,11 +44,30 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "series = pd.Series(lst) # Create a Pandas series\n", + "print(series)" ] }, { @@ -62,11 +81,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The third value is 74.4\n" + ] + } + ], "source": [ - "# your code here" + "third_value = series[2] # access the third value in the series\n", + "print(f\"The third value is {third_value}\")" ] }, { @@ -78,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -96,11 +124,30 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 17, "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], + "source": [ + "df = pd.DataFrame(b) # create the DataFrame\n", + "print(df)" ] }, { @@ -130,11 +177,32 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7\n" + ] + } + ], + "source": [ + "column_names = ['Score_1','Score_2','Score_3','Score_4','Score_5']\n", + "\n", + "df.columns = column_names # renaming the columns\n", + "print(df)" ] }, { @@ -146,11 +214,32 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7\n" + ] + } + ], + "source": [ + "# subset_df = df.iloc[:, [0,2,4]] # select columns by index\n", + "subset_df = df[['Score_1','Score_3','Score_5']] # create a subset by columns names\n", + "\n", + "print(subset_df)" ] }, { @@ -162,11 +251,21 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 35, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The average of Score_3 column is: 56.95000000000001\n" + ] + } + ], "source": [ - "# your code here" + "# average_score_3 = df[2].mean()\n", + "average_score_3 = df['Score_3'].mean()\n", + "print(f\"The average Score_3 value is: {average_score_3}\")" ] }, { @@ -178,11 +277,21 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 37, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The maximun Score_4 value is: 88.8\n" + ] + } + ], "source": [ - "# your code here" + "# max_score_4 = df[3].mean()\n", + "max_score_4 = df['Score_4'].max()\n", + "print(f\"The maximun Score_4 value is: {max_score_4}\")" ] }, { @@ -194,11 +303,21 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 39, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The median Score_2 value is: 40.75\n" + ] + } + ], "source": [ - "# your code here" + "# meadian_score_2 = df[1].median()\n", + "meadian_score_2 = df['Score_2'].median()\n", + "print(f\"The median Score_2 value is: {meadian_score_2}\")" ] }, { @@ -210,7 +329,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -231,11 +350,32 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Orders DataFrame:\n", + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00\n" + ] + } + ], + "source": [ + "orders_df = pd.DataFrame(orders)\n", + "print(\"Orders DataFrame:\")\n", + "print(orders_df)" ] }, { @@ -247,11 +387,24 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The total quantity ordered is: 2978\n", + "The total revenue generated is: 637.0\n" + ] + } + ], + "source": [ + "total_quantity = orders_df['Quantity'].sum()\n", + "total_revenue = orders_df['Revenue'].sum()\n", + "\n", + "print(f\"The total quantity ordered is: {total_quantity}\")\n", + "print(f\"The total revenue generated is: {total_revenue}\")" ] }, { @@ -263,11 +416,24 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The difference between most 11.95 and least 0.18 expensive prices is 11.77\n" + ] + } + ], + "source": [ + "most_expensive = orders_df['UnitPrice'].max()\n", + "least_expensive = orders_df['UnitPrice'].min()\n", + "\n", + "difference = most_expensive - least_expensive\n", + "\n", + "print(f\"The difference between most {most_expensive} and least {least_expensive} expensive prices is {difference}\")" ] }, { @@ -279,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 133, "metadata": {}, "outputs": [], "source": [ @@ -296,11 +462,31 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 \n" + ] + } + ], + "source": [ + "print(admissions.head())" ] }, { @@ -312,11 +498,55 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Missing Values in Each Column:\n", + " Serial No. 0\n", + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64\n", + "\n", + "Number of Columns with Missing Values: 0\n", + "All Columns Have Missing Values: False\n", + "\n", + "Total Missing Values in the DataFrame: 0\n" + ] + } + ], + "source": [ + "admissions_df = pd.DataFrame(admissions)\n", + "\n", + "# Check for missing values in the DataFrame\n", + "missing_values = pd.isnull(admissions_df)\n", + "\n", + "# Count missing values in each column\n", + "missing_counts = missing_values.sum()\n", + "\n", + "# Count columns with missing values\n", + "columns_with_missing = missing_counts[missing_counts > 0].count()\n", + "\n", + "# Check if all columns have missing values\n", + "all_columns_missing = missing_counts.all()\n", + "\n", + "# Calculate the total number of missing values\n", + "total_missing_values = missing_counts.sum()\n", + "\n", + "# Display the results\n", + "print(\"Missing Values in Each Column:\\n\", missing_counts)\n", + "print(\"\\nNumber of Columns with Missing Values:\", columns_with_missing)\n", + "print(\"All Columns Have Missing Values:\", all_columns_missing)\n", + "print(\"\\nTotal Missing Values in the DataFrame:\", total_missing_values)" ] }, { @@ -328,11 +558,34 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR \\\n", + "Serial No. \n", + "1 1 337 118 4 4.5 4.5 \n", + "2 2 316 104 3 3.0 3.5 \n", + "3 3 322 110 3 3.5 2.5 \n", + "4 4 314 103 2 2.0 3.0 \n", + "5 5 330 115 5 4.5 3.0 \n", + "\n", + " CGPA Research Chance of Admit \n", + "Serial No. \n", + "1 9.65 1 0.92 \n", + "2 8.00 1 0.72 \n", + "3 8.67 1 0.80 \n", + "4 8.21 0 0.65 \n", + "5 9.34 1 0.90 \n" + ] + } + ], + "source": [ + "admissions_df = admissions_df.set_index('Serial No.', drop=False) # Set 'Serial No.' as the index \n", + "print(admissions_df.head())" ] }, { @@ -351,13 +604,28 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 108, "metadata": { "scrolled": true }, - "outputs": [], - "source": [ - "# your code here" + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The number of rows where CGPA > 9 and the student has performed research is 101\n" + ] + } + ], + "source": [ + "# Filter rows where CGPA > 9 and Research = 1\n", + "filtered_rows_df = admissions_df[(admissions_df['CGPA'] > 9) & (admissions_df['Research'] == 1)]\n", + "\n", + "# Count the number of rows\n", + "number_of_rows = filtered_rows_df.shape[0]\n", + "\n", + "# Display the result\n", + "print(f\"The number of rows where CGPA > 9 and the student has performed research is {number_of_rows}\")" ] }, { @@ -369,11 +637,42 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',\n", + " 'LOR ', 'CGPA', 'Research', 'Chance of Admit '],\n", + " dtype='object')\n", + "Index(['Serial No.', 'GRE Score', 'TOEFL Score', 'University Rating', 'SOP',\n", + " 'LOR', 'CGPA', 'Research', 'Chance of Admit'],\n", + " dtype='object')\n", + "\n", + "Rows in selection: 5\n", + "The mean chance of adminit for students where CGPA > 9 and SOP < 3.5 is 0.80\n" + ] + } + ], + "source": [ + "# Filter rows where CGPA > 9 and SOP < 3.5\n", + "filtered_rows_df = admissions_df[(admissions_df['CGPA'] > 9) & (admissions_df['SOP'] < 3.5)]\n", + "\n", + "print(filtered_rows_df.columns) # Check the real name of the column 'Chance of Admit '\n", + "filtered_rows_df.columns = admissions_df.columns.str.strip() # Remove spaces at the end\n", + "print(filtered_rows_df.columns) # Check the real name of the column 'Chance of Admit '\n", + "print()\n", + "\n", + "# Count the number of rows\n", + "number_of_rows = filtered_rows_df.shape[0]\n", + "# mean chance of admit for applications \n", + "mean_chance_of_admit = filtered_rows_df['Chance of Admit'].mean()\n", + "\n", + "# Display the result\n", + "print(f'Rows in selection: {number_of_rows}')\n", + "print(f\"The mean chance of adminit for students where CGPA > 9 and SOP < 3.5 is {mean_chance_of_admit:.2f}\")" ] } ], @@ -393,7 +692,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.12.7" }, "toc": { "base_numbering": "", @@ -410,5 +709,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }