diff --git a/Lab6_pandas_1.ipynb b/Lab6_pandas_1.ipynb new file mode 100644 index 0000000..89fc55c --- /dev/null +++ b/Lab6_pandas_1.ipynb @@ -0,0 +1,3765 @@ +{ + "cells": [ + { + "cell_type": "code", + "source": [ + "\n", + "!git clone https://github.com/Ingtec/lab-pandas-en.git\n", + "\n", + "\n", + "%cd lab-pandas-en" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zBGn_uFPdGdz", + "outputId": "9ec06d87-97cb-4852-bfe5-49be77b322e5" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'lab-pandas-en'...\n", + "remote: Enumerating objects: 14, done.\u001b[K\n", + "remote: Counting objects: 100% (2/2), done.\u001b[K\n", + "remote: Compressing objects: 100% (2/2), done.\u001b[K\n", + "remote: Total 14 (delta 0), reused 0 (delta 0), pack-reused 12 (from 1)\u001b[K\n", + "Receiving objects: 100% (14/14), 343.01 KiB | 6.60 MiB/s, done.\n", + "/content/lab-pandas-en\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import sys\n", + "\n", + "\n", + "sys.path.append('/content/lab-pandas-en/your-code')\n", + "\n", + "import pandas as pd\n", + "admissions = pd.read_csv('Admission_Predict.csv')\n", + "\n", + "\n", + "print(admissions.head())\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4orBTy4OgRTv", + "outputId": "243e5eb2-9bbe-4906-d48e-9dbf04515e98" + }, + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueBLRJC-dE-O" + }, + "source": [ + "# Introduction to Pandas Lab\n", + "\n", + "Complete the following set of exercises to solidify your knowledge of Pandas fundamentals." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u3jMT8DUdE-P" + }, + "source": [ + "### 1. Import Numpy and Pandas and alias them to `np` and `pd` respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "1zjhQ_p7dE-P" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WHobkuZydE-Q" + }, + "source": [ + "### 2. Create a Pandas Series containing the elements of the list below." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "j3lX7L5YdE-Q" + }, + "outputs": [], + "source": [ + "lst = [5.7, 75.2, 74.4, 84.0, 66.5, 66.3, 55.8, 75.7, 29.1, 43.7]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 398 + }, + "id": "rJcdG9exdE-Q", + "outputId": "bb5a4e51-9f45-46a6-fb1f-b12ad249d80f" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
05.7
175.2
274.4
384.0
466.5
566.3
655.8
775.7
829.1
943.7
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "lst = [5.7, 75.2, 74.4, 84.0, 66.5, 66.3, 55.8, 75.7, 29.1, 43.7]\n", + "series = pd.Series(lst)\n", + "series" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IMpSP0JFdE-Q" + }, + "source": [ + "### 3. Use indexing to return the third value in the Series above.\n", + "\n", + "*Hint: Remember that indexing begins at 0.*" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0gKY-j5fdE-Q", + "outputId": "b472c16b-e2b0-42c2-89b0-44748bb7f29b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "74.4" + ] + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "third_value = series[2] # Remember indexing starts at 0\n", + "third_value\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xYgfKBq1dE-Q" + }, + "source": [ + "### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "ioRHZf5hdE-R" + }, + "outputs": [], + "source": [ + "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", + " [61.3, 40.8, 30.8, 37.8, 87.6],\n", + " [20.6, 73.2, 44.2, 14.6, 91.8],\n", + " [57.4, 0.1, 96.1, 4.2, 69.5],\n", + " [83.6, 20.5, 85.4, 22.8, 35.9],\n", + " [49.0, 69.0, 0.1, 31.8, 89.1],\n", + " [23.3, 40.7, 95.0, 83.8, 26.9],\n", + " [27.6, 26.4, 53.8, 88.8, 68.5],\n", + " [96.6, 96.4, 53.4, 72.4, 50.1],\n", + " [73.7, 39.0, 43.2, 81.6, 34.7]]" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "j6mMWjPzdE-R", + "outputId": "b81f9c00-fba5-457f-841b-910087cd7189" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " 0 1 2 3 4\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": 0,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25.64898958373734,\n \"min\": 20.6,\n \"max\": 96.6,\n \"num_unique_values\": 10,\n \"samples\": [\n 96.6,\n 61.3,\n 49.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 1,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 32.12203985497254,\n \"min\": 0.1,\n \"max\": 96.4,\n \"num_unique_values\": 10,\n \"samples\": [\n 96.4,\n 40.8,\n 69.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 2,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.168278041678146,\n \"min\": 0.1,\n \"max\": 96.1,\n \"num_unique_values\": 10,\n \"samples\": [\n 53.4,\n 30.8,\n 0.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 3,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31.39344588356691,\n \"min\": 4.2,\n \"max\": 88.8,\n \"num_unique_values\": 10,\n \"samples\": [\n 72.4,\n 37.8,\n 31.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 4,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24.562313047069125,\n \"min\": 26.9,\n \"max\": 91.8,\n \"num_unique_values\": 10,\n \"samples\": [\n 50.1,\n 87.6,\n 89.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 11 + } + ], + "source": [ + "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", + " [61.3, 40.8, 30.8, 37.8, 87.6],\n", + " [20.6, 73.2, 44.2, 14.6, 91.8],\n", + " [57.4, 0.1, 96.1, 4.2, 69.5],\n", + " [83.6, 20.5, 85.4, 22.8, 35.9],\n", + " [49.0, 69.0, 0.1, 31.8, 89.1],\n", + " [23.3, 40.7, 95.0, 83.8, 26.9],\n", + " [27.6, 26.4, 53.8, 88.8, 68.5],\n", + " [96.6, 96.4, 53.4, 72.4, 50.1],\n", + " [73.7, 39.0, 43.2, 81.6, 34.7]]\n", + "\n", + "df = pd.DataFrame(b)\n", + "df\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VcoEFR27dE-R" + }, + "source": [ + "### 5. Rename the data frame columns based on the names in the list below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AGn-Jxm2dE-R" + }, + "outputs": [], + "source": [ + "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", + " [61.3, 40.8, 30.8, 37.8, 87.6],\n", + " [20.6, 73.2, 44.2, 14.6, 91.8],\n", + " [57.4, 0.1, 96.1, 4.2, 69.5],\n", + " [83.6, 20.5, 85.4, 22.8, 35.9],\n", + " [49.0, 69.0, 0.1, 31.8, 89.1],\n", + " [23.3, 40.7, 95.0, 83.8, 26.9],\n", + " [27.6, 26.4, 53.8, 88.8, 68.5],\n", + " [96.6, 96.4, 53.4, 72.4, 50.1],\n", + " [73.7, 39.0, 43.2, 81.6, 34.7]]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "RPYCs7aEdE-R", + "outputId": "38981318-e35b-420d-e520-13d204a6fc29" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Score_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25.64898958373734,\n \"min\": 20.6,\n \"max\": 96.6,\n \"num_unique_values\": 10,\n \"samples\": [\n 96.6,\n 61.3,\n 49.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Score_2\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 32.12203985497254,\n \"min\": 0.1,\n \"max\": 96.4,\n \"num_unique_values\": 10,\n \"samples\": [\n 96.4,\n 40.8,\n 69.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Score_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.168278041678146,\n \"min\": 0.1,\n \"max\": 96.1,\n \"num_unique_values\": 10,\n \"samples\": [\n 53.4,\n 30.8,\n 0.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Score_4\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31.39344588356691,\n \"min\": 4.2,\n \"max\": 88.8,\n \"num_unique_values\": 10,\n \"samples\": [\n 72.4,\n 37.8,\n 31.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Score_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24.562313047069125,\n \"min\": 26.9,\n \"max\": 91.8,\n \"num_unique_values\": 10,\n \"samples\": [\n 50.1,\n 87.6,\n 89.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 12 + } + ], + "source": [ + "columns = [\"Score_1\", \"Score_2\", \"Score_3\", \"Score_4\", \"Score_5\"]\n", + "df.columns = columns\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RkYH4EeidE-R" + }, + "source": [ + "### 6. Create a subset of this data frame that contains only the Score 1, 3, and 5 columns." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "LQg2KvgydE-R", + "outputId": "c331b850-695c-4652-8cb1-0c90adc6ba15" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_3Score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "subset", + "summary": "{\n \"name\": \"subset\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Score_1\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 25.64898958373734,\n \"min\": 20.6,\n \"max\": 96.6,\n \"num_unique_values\": 10,\n \"samples\": [\n 96.6,\n 61.3,\n 49.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Score_3\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.168278041678146,\n \"min\": 0.1,\n \"max\": 96.1,\n \"num_unique_values\": 10,\n \"samples\": [\n 53.4,\n 30.8,\n 0.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Score_5\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24.562313047069125,\n \"min\": 26.9,\n \"max\": 91.8,\n \"num_unique_values\": 10,\n \"samples\": [\n 50.1,\n 87.6,\n 89.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "subset = df[[\"Score_1\", \"Score_3\", \"Score_5\"]]\n", + "subset\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L9UiE-_ndE-R" + }, + "source": [ + "### 7. From the original data frame, calculate the average Score_3 value." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9HqMgx3jdE-R", + "outputId": "3931f0af-7417-4d3a-a5f3-a1d51c89ab1b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ], + "source": [ + "average_score_3 = df[\"Score_3\"].mean()\n", + "average_score_3\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b7QvVALtdE-S" + }, + "source": [ + "### 8. From the original data frame, calculate the maximum Score_4 value." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "SkWf2hq-dE-S", + "outputId": "f258c34f-e266-4298-fca3-36e37e7093dd" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "88.8" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "max_score_4 = df[\"Score_4\"].max()\n", + "max_score_4\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ULzzZ3kgdE-S" + }, + "source": [ + "### 9. From the original data frame, calculate the median Score 2 value." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "56tSli7HdE-S", + "outputId": "9b290d2b-b0ad-4297-b215-b889b955c462" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "40.75" + ] + }, + "metadata": {}, + "execution_count": 16 + } + ], + "source": [ + "median_score_2 = df[\"Score_2\"].median()\n", + "median_score_2\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9QhAZaQodE-S" + }, + "source": [ + "### 10. Create a Pandas DataFrame from the dictionary of product orders below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j2mfKrBGdE-S" + }, + "outputs": [], + "source": [ + "orders = {'Description': ['LUNCH BAG APPLE DESIGN',\n", + " 'SET OF 60 VINTAGE LEAF CAKE CASES ',\n", + " 'RIBBON REEL STRIPES DESIGN ',\n", + " 'WORLD WAR 2 GLIDERS ASSTD DESIGNS',\n", + " 'PLAYING CARDS JUBILEE UNION JACK',\n", + " 'POPCORN HOLDER',\n", + " 'BOX OF VINTAGE ALPHABET BLOCKS',\n", + " 'PARTY BUNTING',\n", + " 'JAZZ HEARTS ADDRESS BOOK',\n", + " 'SET OF 4 SANTA PLACE SETTINGS'],\n", + " 'Quantity': [1, 24, 1, 2880, 2, 7, 1, 4, 10, 48],\n", + " 'UnitPrice': [1.65, 0.55, 1.65, 0.18, 1.25, 0.85, 11.95, 4.95, 0.19, 1.25],\n", + " 'Revenue': [1.65, 13.2, 1.65, 518.4, 2.5, 5.95, 11.95, 19.8, 1.9, 60.0]}" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "x1mjGAkwdE-S", + "outputId": "e5167f91-6c26-4f81-808d-c77d9a1a0032" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "orders_df", + "summary": "{\n \"name\": \"orders_df\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"Description\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"JAZZ HEARTS ADDRESS BOOK\",\n \"SET OF 60 VINTAGE LEAF CAKE CASES \",\n \"POPCORN HOLDER\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Quantity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 907,\n \"min\": 1,\n \"max\": 2880,\n \"num_unique_values\": 8,\n \"samples\": [\n 24,\n 4,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"UnitPrice\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.606944074359285,\n \"min\": 0.18,\n \"max\": 11.95,\n \"num_unique_values\": 8,\n \"samples\": [\n 0.55,\n 11.95,\n 1.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Revenue\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 160.73492671683607,\n \"min\": 1.65,\n \"max\": 518.4,\n \"num_unique_values\": 9,\n \"samples\": [\n 1.9,\n 13.2,\n 11.95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "orders = {\n", + " 'Description': [\n", + " 'LUNCH BAG APPLE DESIGN',\n", + " 'SET OF 60 VINTAGE LEAF CAKE CASES ',\n", + " 'RIBBON REEL STRIPES DESIGN ',\n", + " 'WORLD WAR 2 GLIDERS ASSTD DESIGNS',\n", + " 'PLAYING CARDS JUBILEE UNION JACK',\n", + " 'POPCORN HOLDER',\n", + " 'BOX OF VINTAGE ALPHABET BLOCKS',\n", + " 'PARTY BUNTING',\n", + " 'JAZZ HEARTS ADDRESS BOOK',\n", + " 'SET OF 4 SANTA PLACE SETTINGS'\n", + " ],\n", + " 'Quantity': [1, 24, 1, 2880, 2, 7, 1, 4, 10, 48],\n", + " 'UnitPrice': [1.65, 0.55, 1.65, 0.18, 1.25, 0.85, 11.95, 4.95, 0.19, 1.25],\n", + " 'Revenue': [1.65, 13.2, 1.65, 518.4, 2.5, 5.95, 11.95, 19.8, 1.9, 60.0]\n", + "}\n", + "\n", + "orders_df = pd.DataFrame(orders)\n", + "orders_df\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y_h9H5t3dE-S" + }, + "source": [ + "### 11. Calculate the total quantity ordered and revenue generated from these orders." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IUGC3z9CdE-S", + "outputId": "5242d661-b5dd-4777-8e7a-9869b0390306" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(2978, 637.0)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "total_quantity = orders_df[\"Quantity\"].sum()\n", + "total_revenue = orders_df[\"Revenue\"].sum()\n", + "\n", + "total_quantity, total_revenue\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y1wlVlRVdE-S" + }, + "source": [ + "### 12. Obtain the prices of the most expensive and least expensive items ordered and print the difference." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fQdnb4LkdE-S", + "outputId": "b656d680-42ba-4a3e-ee65-f5e0c2a2980b" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(11.95, 0.18, 11.77)" + ] + }, + "metadata": {}, + "execution_count": 19 + } + ], + "source": [ + "most_expensive = orders_df[\"UnitPrice\"].max()\n", + "least_expensive = orders_df[\"UnitPrice\"].min()\n", + "price_difference = most_expensive - least_expensive\n", + "\n", + "most_expensive, least_expensive, price_difference\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8I_kviOydE-S" + }, + "source": [ + "## Let's load another dataset for more exercisesº" + ] + }, + { + "cell_type": "code", + "source": [ + "import sys\n", + "\n", + "\n", + "sys.path.append('/content/lab-pandas-en/your-code')\n", + "\n", + "import pandas as pd\n", + "admissions = pd.read_csv('Admission_Predict.csv')\n", + "\n", + "\n", + "print(admissions.head())\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "936b9ca0-c6f1-42fb-d0b8-1242d05f28c3", + "id": "u2GfZb8jiq3I" + }, + "execution_count": 32, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Serial No. GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "0 1 337 118 4 4.5 4.5 9.65 \n", + "1 2 316 104 3 3.0 3.5 8.00 \n", + "2 3 322 110 3 3.5 2.5 8.67 \n", + "3 4 314 103 2 2.0 3.0 8.21 \n", + "4 5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "0 1 0.92 \n", + "1 1 0.72 \n", + "2 1 0.80 \n", + "3 0 0.65 \n", + "4 1 0.90 \n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J9lHPbFLdE-S" + }, + "source": [ + "Let's evaluate the dataset by looking at the `head` function." + ] + }, + { + "cell_type": "code", + "source": [ + "admissions.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "id": "tCvgfavUnBQ_", + "outputId": "c2402bb0-c60f-4520-9b5f-5808b00a13fc" + }, + "execution_count": 48, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "2 316 104 3 3.0 3.5 8.00 \n", + "3 322 110 3 3.5 2.5 8.67 \n", + "4 314 103 2 2.0 3.0 8.21 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "\n", + " Research Chance of Admit \n", + "Serial No. \n", + "1 1 0.92 \n", + "2 1 0.72 \n", + "3 1 0.80 \n", + "4 0 0.65 \n", + "5 1 0.90 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
Serial No.
133711844.54.59.6510.92
231610433.03.58.0010.72
332211033.52.58.6710.80
431410322.03.08.2100.65
533011554.53.09.3410.90
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "admissions", + "summary": "{\n \"name\": \"admissions\",\n \"rows\": 385,\n \"fields\": [\n {\n \"column\": \"Serial No.\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 111,\n \"min\": 1,\n \"max\": 385,\n \"num_unique_values\": 385,\n \"samples\": [\n 269,\n 251,\n 356\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GRE Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11,\n \"min\": 290,\n \"max\": 340,\n \"num_unique_values\": 49,\n \"samples\": [\n 317,\n 335,\n 297\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TOEFL Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 92,\n \"max\": 120,\n \"num_unique_values\": 29,\n \"samples\": [\n 94,\n 119,\n 107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"University Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 5,\n \"num_unique_values\": 5,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"SOP\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0076395819221657,\n \"min\": 1.0,\n \"max\": 5.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 1.0,\n 3.0,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LOR \",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.8969362987559213,\n \"min\": 1.0,\n \"max\": 5.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 5.0,\n 3.5,\n 1.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"CGPA\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6023853232371412,\n \"min\": 6.8,\n \"max\": 9.92,\n \"num_unique_values\": 168,\n \"samples\": [\n 9.68,\n 7.6,\n 7.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Research\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Chance of Admit \",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1440267052633097,\n \"min\": 0.34,\n \"max\": 0.97,\n \"num_unique_values\": 60,\n \"samples\": [\n 0.92,\n 0.75\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 48 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B-ydnn7KdE-T" + }, + "source": [ + "### 1 - Before beginning to work with this dataset and evaluating graduate admissions data, we will verify that there is no missing data in the dataset. Do this in the cell below." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 335 + }, + "id": "YeZ-EVH4dE-T", + "outputId": "56611b59-df62-4e30-9668-fd21fa18db1c" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "GRE Score 0\n", + "TOEFL Score 0\n", + "University Rating 0\n", + "SOP 0\n", + "LOR 0\n", + "CGPA 0\n", + "Research 0\n", + "Chance of Admit 0\n", + "dtype: int64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
GRE Score0
TOEFL Score0
University Rating0
SOP0
LOR0
CGPA0
Research0
Chance of Admit0
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 44 + } + ], + "source": [ + "# Check for missing values in the DataFrame\n", + "# This will sum the null values in each column\n", + "admissions.isnull().sum()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EMnv4zBBdE-T" + }, + "source": [ + "### 2 - Interestingly, there is a column that uniquely identifies the applicants. This column is the serial number column. Instead of having our own index, we should make this column our index. Do this in the cell below. Keep the column in the dataframe in addition to making it an index." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 238 + }, + "id": "mAAjiCKSdE-T", + "outputId": "5f7c7575-08e0-4554-8532-135b73c50622" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " level_0 index GRE Score TOEFL Score University Rating SOP \\\n", + "Serial No. \n", + "1 0 0 337 118 4 4.5 \n", + "2 1 1 316 104 3 3.0 \n", + "3 2 2 322 110 3 3.5 \n", + "4 3 3 314 103 2 2.0 \n", + "5 4 4 330 115 5 4.5 \n", + "\n", + " LOR CGPA Research Chance of Admit \n", + "Serial No. \n", + "1 4.5 9.65 1 0.92 \n", + "2 3.5 8.00 1 0.72 \n", + "3 2.5 8.67 1 0.80 \n", + "4 3.0 8.21 0 0.65 \n", + "5 3.0 9.34 1 0.90 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
level_0indexGRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
Serial No.
10033711844.54.59.6510.92
21131610433.03.58.0010.72
32232211033.52.58.6710.80
43331410322.03.08.2100.65
54433011554.53.09.3410.90
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "admissions", + "summary": "{\n \"name\": \"admissions\",\n \"rows\": 385,\n \"fields\": [\n {\n \"column\": \"Serial No.\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 111,\n \"min\": 1,\n \"max\": 385,\n \"num_unique_values\": 385,\n \"samples\": [\n 269,\n 251,\n 356\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"level_0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 111,\n \"min\": 0,\n \"max\": 384,\n \"num_unique_values\": 385,\n \"samples\": [\n 268,\n 250,\n 355\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"index\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 111,\n \"min\": 0,\n \"max\": 384,\n \"num_unique_values\": 385,\n \"samples\": [\n 268,\n 250,\n 355\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GRE Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11,\n \"min\": 290,\n \"max\": 340,\n \"num_unique_values\": 49,\n \"samples\": [\n 317,\n 335,\n 297\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TOEFL Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 92,\n \"max\": 120,\n \"num_unique_values\": 29,\n \"samples\": [\n 94,\n 119,\n 107\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"University Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 1,\n \"max\": 5,\n \"num_unique_values\": 5,\n \"samples\": [\n 3,\n 1,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"SOP\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0076395819221657,\n \"min\": 1.0,\n \"max\": 5.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 1.0,\n 3.0,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LOR \",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.8969362987559213,\n \"min\": 1.0,\n \"max\": 5.0,\n \"num_unique_values\": 9,\n \"samples\": [\n 5.0,\n 3.5,\n 1.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"CGPA\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6023853232371412,\n \"min\": 6.8,\n \"max\": 9.92,\n \"num_unique_values\": 168,\n \"samples\": [\n 9.68,\n 7.6,\n 7.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Research\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Chance of Admit \",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1440267052633097,\n \"min\": 0.34,\n \"max\": 0.97,\n \"num_unique_values\": 60,\n \"samples\": [\n 0.92,\n 0.75\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 70 + } + ], + "source": [ + "admissions['Serial No.'] = range(1, len(admissions) + 1)\n", + "admissions.set_index('Serial No.', inplace=True)\n", + "admissions.head()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rj_TaONsdE-T" + }, + "source": [ + "Turns out that `GRE Score` and `CGPA` also uniquely identify the data. Show this in the cell below." + ] + }, + { + "cell_type": "code", + "source": [ + "unique_rows = admissions[[\"GRE Score\", \"CGPA\"]].duplicated().sum() == 0\n", + "unique_rows" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LnRTeqhCqDjk", + "outputId": "3a6bb522-23bf-4073-be18-a68fea8bfaf3" + }, + "execution_count": 58, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 58 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4BBg6cqqdE-T" + }, + "source": [ + "### 3 - In this part of the lab, we would like to test complex conditions on the entire data set at once. Let's start by finding the number of rows where the CGPA is greater than 9 and the student has performed an investigation." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "scrolled": true, + "colab": { + "base_uri": "https://localhost:8080/", + "height": 455 + }, + "id": "yXCrb4OvdE-T", + "outputId": "e852686d-bb92-4b59-81ba-0425b36f9a98" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " GRE Score TOEFL Score University Rating SOP LOR CGPA \\\n", + "Serial No. \n", + "1 337 118 4 4.5 4.5 9.65 \n", + "5 330 115 5 4.5 3.0 9.34 \n", + "11 328 112 4 4.0 4.5 9.10 \n", + "20 328 116 5 5.0 5.0 9.50 \n", + "21 334 119 5 5.0 4.5 9.70 \n", + "... ... ... ... ... ... ... \n", + "380 329 111 4 4.5 4.0 9.23 \n", + "381 324 110 3 3.5 3.5 9.04 \n", + "382 325 107 3 3.0 3.5 9.11 \n", + "383 330 116 4 5.0 4.5 9.45 \n", + "385 333 117 4 5.0 4.0 9.66 \n", + "\n", + " Research Chance of Admit \n", + "Serial No. \n", + "1 1 0.92 \n", + "5 1 0.90 \n", + "11 1 0.78 \n", + "20 1 0.94 \n", + "21 1 0.95 \n", + "... ... ... \n", + "380 1 0.89 \n", + "381 1 0.82 \n", + "382 1 0.84 \n", + "383 1 0.91 \n", + "385 1 0.95 \n", + "\n", + "[101 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GRE ScoreTOEFL ScoreUniversity RatingSOPLORCGPAResearchChance of Admit
Serial No.
133711844.54.59.6510.92
533011554.53.09.3410.90
1132811244.04.59.1010.78
2032811655.05.09.5010.94
2133411955.04.59.7010.95
...........................
38032911144.54.09.2310.89
38132411033.53.59.0410.82
38232510733.03.59.1110.84
38333011645.04.59.4510.91
38533311745.04.09.6610.95
\n", + "

101 rows × 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "cgpa_research", + "summary": "{\n \"name\": \"cgpa_research\",\n \"rows\": 101,\n \"fields\": [\n {\n \"column\": \"Serial No.\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 108,\n \"min\": 1,\n \"max\": 385,\n \"num_unique_values\": 101,\n \"samples\": [\n 299,\n 185,\n 218\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GRE Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 310,\n \"max\": 340,\n \"num_unique_values\": 25,\n \"samples\": [\n 320,\n 310,\n 337\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TOEFL Score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 106,\n \"max\": 120,\n \"num_unique_values\": 15,\n \"samples\": [\n 111,\n 120,\n 118\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"University Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2,\n \"max\": 5,\n \"num_unique_values\": 4,\n \"samples\": [\n 5,\n 2,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"SOP\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5425918729524609,\n \"min\": 3.0,\n \"max\": 5.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 4.0,\n 3.5,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"LOR \",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6556306011379539,\n \"min\": 2.5,\n \"max\": 5.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 4.5,\n 3.0,\n 2.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"CGPA\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2455127590401061,\n \"min\": 9.01,\n \"max\": 9.92,\n \"num_unique_values\": 57,\n \"samples\": [\n 9.65,\n 9.8,\n 9.92\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Research\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 1,\n \"num_unique_values\": 1,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Chance of Admit \",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.06240826931751849,\n \"min\": 0.66,\n \"max\": 0.97,\n \"num_unique_values\": 24,\n \"samples\": [\n 0.86\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 59 + } + ], + "source": [ + "cgpa_research = admissions[(admissions[\"CGPA\"] > 9) & (admissions[\"Research\"] == 1)]\n", + "cgpa_research\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a7gJj7OedE-T" + }, + "source": [ + "### 4 - Now return all the rows where the CGPA is greater than 9 and the SOP score is less than 3.5. Find the mean chance of admit for these applicants." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Xib95b7TdE-T", + "outputId": "f6711fca-74e7-4272-b08c-d3fb6bb006b4" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "0.8019999999999999" + ] + }, + "metadata": {}, + "execution_count": 60 + } + ], + "source": [ + "filtered_admissions = admissions[(admissions[\"CGPA\"] > 9) & (admissions[\"SOP\"] < 3.5)]\n", + "mean_chance = filtered_admissions[\"Chance of Admit \"].mean()\n", + "mean_chance\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "toc": { + "base_numbering": "", + "nav_menu": {}, + "number_sections": false, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": true + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file