From 3c8e9a3cecfe7a77165675137ef66317b3013bb2 Mon Sep 17 00:00:00 2001
From: Ananya Gupta <145869907+ananyag309@users.noreply.github.com>
Date: Sat, 26 Oct 2024 18:46:40 +0530
Subject: [PATCH] Add files via upload
---
.../stress_level_detect/model.py | 26 +
.../notebooks/stress_level_detection.ipynb | 3437 +++++++++++++++++
.../stress_level_detect/predict.py | 17 +
.../saved_models/random_forest_model.joblib | Bin 0 -> 1575297 bytes
4 files changed, 3480 insertions(+)
create mode 100644 Prediction Models/stress_level_detect/model.py
create mode 100644 Prediction Models/stress_level_detect/notebooks/stress_level_detection.ipynb
create mode 100644 Prediction Models/stress_level_detect/predict.py
create mode 100644 Prediction Models/stress_level_detect/saved_models/random_forest_model.joblib
diff --git a/Prediction Models/stress_level_detect/model.py b/Prediction Models/stress_level_detect/model.py
new file mode 100644
index 00000000..83c628ec
--- /dev/null
+++ b/Prediction Models/stress_level_detect/model.py
@@ -0,0 +1,26 @@
+from joblib import load
+
+# Load the trained Random Forest model
+model = load('models/stress_level_detect/saved_models/random_forest_model.joblib')
+
+def stress_level_prediction(age, freq_no_purpose, freq_distracted, restless, worry_level, difficulty_concentrating, compare_to_successful_people, feelings_about_comparisons, freq_seeking_validation, freq_feeling_depressed, interest_fluctuation, sleep_issues):
+ # Feature extraction
+ features = [
+ float(age),
+ int(freq_no_purpose),
+ int(freq_distracted),
+ int(restless),
+ int(worry_level),
+ int(difficulty_concentrating),
+ int(compare_to_successful_people),
+ int(feelings_about_comparisons),
+ int(freq_seeking_validation),
+ int(freq_feeling_depressed),
+ int(interest_fluctuation),
+ int(sleep_issues)
+ ]
+
+ prediction = model.predict([features])[0]
+
+ return prediction
+
diff --git a/Prediction Models/stress_level_detect/notebooks/stress_level_detection.ipynb b/Prediction Models/stress_level_detect/notebooks/stress_level_detection.ipynb
new file mode 100644
index 00000000..e46d72ed
--- /dev/null
+++ b/Prediction Models/stress_level_detect/notebooks/stress_level_detection.ipynb
@@ -0,0 +1,3437 @@
+{
+ "metadata": {
+ "kernelspec": {
+ "language": "python",
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "pygments_lexer": "ipython3",
+ "nbconvert_exporter": "python",
+ "version": "3.6.4",
+ "file_extension": ".py",
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "name": "python",
+ "mimetype": "text/x-python"
+ },
+ "kaggle": {
+ "accelerator": "none",
+ "dataSources": [
+ {
+ "sourceId": 6154312,
+ "sourceType": "datasetVersion",
+ "datasetId": 2752689
+ }
+ ],
+ "dockerImageVersionId": 30558,
+ "isInternetEnabled": true,
+ "language": "python",
+ "sourceType": "notebook",
+ "isGpuEnabled": false
+ },
+ "colab": {
+ "provenance": []
+ }
+ },
+ "nbformat_minor": 0,
+ "nbformat": 4,
+ "cells": [
+ {
+ "source": [
+ "\n",
+ "# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES\n",
+ "# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,\n",
+ "# THEN FEEL FREE TO DELETE THIS CELL.\n",
+ "# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON\n",
+ "# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR\n",
+ "# NOTEBOOK.\n",
+ "\n",
+ "import os\n",
+ "import sys\n",
+ "from tempfile import NamedTemporaryFile\n",
+ "from urllib.request import urlopen\n",
+ "from urllib.parse import unquote, urlparse\n",
+ "from urllib.error import HTTPError\n",
+ "from zipfile import ZipFile\n",
+ "import tarfile\n",
+ "import shutil\n",
+ "\n",
+ "CHUNK_SIZE = 40960\n",
+ "DATA_SOURCE_MAPPING = 'social-media-and-mental-health:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F2752689%2F6154312%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240514%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240514T145335Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D3952f3fc68ff55e2f23468dd7579354bd8172fa054ce709e3f4c6cbbd15e643aaddf70b22b3289d5f7e2da5172aa7725403007f59e458cc88a7c166763e9991b224c97d359c663545b6aa3ba59e029042da5d13ef86f6474d222842f06fb12e7947e42e5c5bb4b4fb1fdb889744b2891f150af945db0334b54bda917a7ae6cb8c07d8ce890baf087920d69bb056374c4fd4567a0ff2b831da64bacea3cc5fdf94ac78d0e16b050b226e2cff1d54f90d14180e1ab46627db027b301044348fcce908d0ec67b70d8009abce1bc043742de6b335b0a784a4de5768cecd21f67f405ae3f2122935f333189cfe7d5444f7bf248f5aee2196e56af79a5ffe8bc429a43'\n",
+ "\n",
+ "KAGGLE_INPUT_PATH='/kaggle/input'\n",
+ "KAGGLE_WORKING_PATH='/kaggle/working'\n",
+ "KAGGLE_SYMLINK='kaggle'\n",
+ "\n",
+ "!umount /kaggle/input/ 2> /dev/null\n",
+ "shutil.rmtree('/kaggle/input', ignore_errors=True)\n",
+ "os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)\n",
+ "os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)\n",
+ "\n",
+ "try:\n",
+ " os.symlink(KAGGLE_INPUT_PATH, os.path.join(\"..\", 'input'), target_is_directory=True)\n",
+ "except FileExistsError:\n",
+ " pass\n",
+ "try:\n",
+ " os.symlink(KAGGLE_WORKING_PATH, os.path.join(\"..\", 'working'), target_is_directory=True)\n",
+ "except FileExistsError:\n",
+ " pass\n",
+ "\n",
+ "for data_source_mapping in DATA_SOURCE_MAPPING.split(','):\n",
+ " directory, download_url_encoded = data_source_mapping.split(':')\n",
+ " download_url = unquote(download_url_encoded)\n",
+ " filename = urlparse(download_url).path\n",
+ " destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)\n",
+ " try:\n",
+ " with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:\n",
+ " total_length = fileres.headers['content-length']\n",
+ " print(f'Downloading {directory}, {total_length} bytes compressed')\n",
+ " dl = 0\n",
+ " data = fileres.read(CHUNK_SIZE)\n",
+ " while len(data) > 0:\n",
+ " dl += len(data)\n",
+ " tfile.write(data)\n",
+ " done = int(50 * dl / int(total_length))\n",
+ " sys.stdout.write(f\"\\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded\")\n",
+ " sys.stdout.flush()\n",
+ " data = fileres.read(CHUNK_SIZE)\n",
+ " if filename.endswith('.zip'):\n",
+ " with ZipFile(tfile) as zfile:\n",
+ " zfile.extractall(destination_path)\n",
+ " else:\n",
+ " with tarfile.open(tfile.name) as tarfile:\n",
+ " tarfile.extractall(destination_path)\n",
+ " print(f'\\nDownloaded and uncompressed: {directory}')\n",
+ " except HTTPError as e:\n",
+ " print(f'Failed to load (likely expired) {download_url} to path {destination_path}')\n",
+ " continue\n",
+ " except OSError as e:\n",
+ " print(f'Failed to load {download_url} to path {destination_path}')\n",
+ " continue\n",
+ "\n",
+ "print('Data source import complete.')\n"
+ ],
+ "metadata": {
+ "id": "ispMP6Qlwh8i",
+ "outputId": "c5a0fa92-e5cd-4ce0-8b09-277a7f1f42a2",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "cell_type": "code",
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Downloading social-media-and-mental-health, 10944 bytes compressed\n",
+ "\r[==================================================] 10944 bytes downloaded\n",
+ "Downloaded and uncompressed: social-media-and-mental-health\n",
+ "Data source import complete.\n"
+ ]
+ }
+ ],
+ "execution_count": null
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "import os\n",
+ "for dirname, _, filenames in os.walk('/kaggle/input'):\n",
+ " for filename in filenames:\n",
+ " print(os.path.join(dirname, filename))"
+ ],
+ "metadata": {
+ "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5",
+ "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19",
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.792825Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.793195Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.802321Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.793144Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.800491Z"
+ },
+ "trusted": true,
+ "id": "N84q3Tgiwh8p",
+ "outputId": "1cf20bea-b82d-4cee-d339-36fbf4819e57",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "/kaggle/input/social-media-and-mental-health/smmh.csv\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh = pd.read_csv('/kaggle/input/social-media-and-mental-health/smmh.csv')\n",
+ "smmh"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.806694Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.807055Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.847373Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.807026Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.845998Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "KsuMfgU3wh8r",
+ "outputId": "e11d3c7e-f893-4e69-9517-1ebf0461de98"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Timestamp 1. What is your age? 2. Gender \\\n",
+ "0 4/18/2022 19:18:47 21.0 Male \n",
+ "1 4/18/2022 19:19:28 21.0 Female \n",
+ "2 4/18/2022 19:25:59 21.0 Female \n",
+ "3 4/18/2022 19:29:43 21.0 Female \n",
+ "4 4/18/2022 19:33:31 21.0 Female \n",
+ ".. ... ... ... \n",
+ "476 5/21/2022 23:38:28 24.0 Male \n",
+ "477 5/22/2022 0:01:05 26.0 Female \n",
+ "478 5/22/2022 10:29:21 29.0 Female \n",
+ "479 7/14/2022 19:33:47 21.0 Male \n",
+ "480 11/12/2022 13:16:50 53.0 Male \n",
+ "\n",
+ " 3. Relationship Status 4. Occupation Status \\\n",
+ "0 In a relationship University Student \n",
+ "1 Single University Student \n",
+ "2 Single University Student \n",
+ "3 Single University Student \n",
+ "4 Single University Student \n",
+ ".. ... ... \n",
+ "476 Single Salaried Worker \n",
+ "477 Married Salaried Worker \n",
+ "478 Married Salaried Worker \n",
+ "479 Single University Student \n",
+ "480 Married Salaried Worker \n",
+ "\n",
+ " 5. What type of organizations are you affiliated with? \\\n",
+ "0 University \n",
+ "1 University \n",
+ "2 University \n",
+ "3 University \n",
+ "4 University \n",
+ ".. ... \n",
+ "476 University, Private \n",
+ "477 University \n",
+ "478 University \n",
+ "479 University \n",
+ "480 Private \n",
+ "\n",
+ " 6. Do you use social media? \\\n",
+ "0 Yes \n",
+ "1 Yes \n",
+ "2 Yes \n",
+ "3 Yes \n",
+ "4 Yes \n",
+ ".. ... \n",
+ "476 Yes \n",
+ "477 Yes \n",
+ "478 Yes \n",
+ "479 Yes \n",
+ "480 Yes \n",
+ "\n",
+ " 7. What social media platforms do you commonly use? \\\n",
+ "0 Facebook, Twitter, Instagram, YouTube, Discord... \n",
+ "1 Facebook, Twitter, Instagram, YouTube, Discord... \n",
+ "2 Facebook, Instagram, YouTube, Pinterest \n",
+ "3 Facebook, Instagram \n",
+ "4 Facebook, Instagram, YouTube \n",
+ ".. ... \n",
+ "476 Facebook, Instagram, YouTube \n",
+ "477 Facebook, YouTube \n",
+ "478 Facebook, YouTube \n",
+ "479 Facebook, Twitter, Instagram, YouTube, Discord... \n",
+ "480 Facebook, YouTube \n",
+ "\n",
+ " 8. What is the average time you spend on social media every day? \\\n",
+ "0 Between 2 and 3 hours \n",
+ "1 More than 5 hours \n",
+ "2 Between 3 and 4 hours \n",
+ "3 More than 5 hours \n",
+ "4 Between 2 and 3 hours \n",
+ ".. ... \n",
+ "476 Between 2 and 3 hours \n",
+ "477 Between 1 and 2 hours \n",
+ "478 Between 2 and 3 hours \n",
+ "479 Between 2 and 3 hours \n",
+ "480 Less than an Hour \n",
+ "\n",
+ " 9. How often do you find yourself using Social media without a specific purpose? \\\n",
+ "0 5 \n",
+ "1 4 \n",
+ "2 3 \n",
+ "3 4 \n",
+ "4 3 \n",
+ ".. ... \n",
+ "476 3 \n",
+ "477 2 \n",
+ "478 3 \n",
+ "479 2 \n",
+ "480 2 \n",
+ "\n",
+ " ... \\\n",
+ "0 ... \n",
+ "1 ... \n",
+ "2 ... \n",
+ "3 ... \n",
+ "4 ... \n",
+ ".. ... \n",
+ "476 ... \n",
+ "477 ... \n",
+ "478 ... \n",
+ "479 ... \n",
+ "480 ... \n",
+ "\n",
+ " 11. Do you feel restless if you haven't used Social media in a while? \\\n",
+ "0 2 \n",
+ "1 2 \n",
+ "2 1 \n",
+ "3 1 \n",
+ "4 4 \n",
+ ".. ... \n",
+ "476 3 \n",
+ "477 2 \n",
+ "478 4 \n",
+ "479 2 \n",
+ "480 1 \n",
+ "\n",
+ " 12. On a scale of 1 to 5, how easily distracted are you? \\\n",
+ "0 5 \n",
+ "1 4 \n",
+ "2 2 \n",
+ "3 3 \n",
+ "4 4 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 3 \n",
+ "478 3 \n",
+ "479 3 \n",
+ "480 3 \n",
+ "\n",
+ " 13. On a scale of 1 to 5, how much are you bothered by worries? \\\n",
+ "0 2 \n",
+ "1 5 \n",
+ "2 5 \n",
+ "3 5 \n",
+ "4 5 \n",
+ ".. ... \n",
+ "476 3 \n",
+ "477 4 \n",
+ "478 2 \n",
+ "479 3 \n",
+ "480 1 \n",
+ "\n",
+ " 14. Do you find it difficult to concentrate on things? \\\n",
+ "0 5 \n",
+ "1 4 \n",
+ "2 4 \n",
+ "3 3 \n",
+ "4 5 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 4 \n",
+ "478 3 \n",
+ "479 2 \n",
+ "480 1 \n",
+ "\n",
+ " 15. On a scale of 1-5, how often do you compare yourself to other successful people through the use of social media? \\\n",
+ "0 2 \n",
+ "1 5 \n",
+ "2 3 \n",
+ "3 5 \n",
+ "4 3 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 4 \n",
+ "478 3 \n",
+ "479 2 \n",
+ "480 3 \n",
+ "\n",
+ " 16. Following the previous question, how do you feel about these comparisons, generally speaking? \\\n",
+ "0 3 \n",
+ "1 1 \n",
+ "2 3 \n",
+ "3 1 \n",
+ "4 3 \n",
+ ".. ... \n",
+ "476 2 \n",
+ "477 2 \n",
+ "478 3 \n",
+ "479 3 \n",
+ "480 3 \n",
+ "\n",
+ " 17. How often do you look to seek validation from features of social media? \\\n",
+ "0 2 \n",
+ "1 1 \n",
+ "2 1 \n",
+ "3 2 \n",
+ "4 3 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 4 \n",
+ "478 4 \n",
+ "479 4 \n",
+ "480 2 \n",
+ "\n",
+ " 18. How often do you feel depressed or down? \\\n",
+ "0 5 \n",
+ "1 5 \n",
+ "2 4 \n",
+ "3 4 \n",
+ "4 4 \n",
+ ".. ... \n",
+ "476 3 \n",
+ "477 4 \n",
+ "478 2 \n",
+ "479 4 \n",
+ "480 2 \n",
+ "\n",
+ " 19. On a scale of 1 to 5, how frequently does your interest in daily activities fluctuate? \\\n",
+ "0 4 \n",
+ "1 4 \n",
+ "2 2 \n",
+ "3 3 \n",
+ "4 4 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 4 \n",
+ "478 2 \n",
+ "479 5 \n",
+ "480 2 \n",
+ "\n",
+ " 20. On a scale of 1 to 5, how often do you face issues regarding sleep? \n",
+ "0 5 \n",
+ "1 5 \n",
+ "2 5 \n",
+ "3 2 \n",
+ "4 1 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 1 \n",
+ "478 2 \n",
+ "479 4 \n",
+ "480 3 \n",
+ "\n",
+ "[481 rows x 21 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Timestamp | \n",
+ " 1. What is your age? | \n",
+ " 2. Gender | \n",
+ " 3. Relationship Status | \n",
+ " 4. Occupation Status | \n",
+ " 5. What type of organizations are you affiliated with? | \n",
+ " 6. Do you use social media? | \n",
+ " 7. What social media platforms do you commonly use? | \n",
+ " 8. What is the average time you spend on social media every day? | \n",
+ " 9. How often do you find yourself using Social media without a specific purpose? | \n",
+ " ... | \n",
+ " 11. Do you feel restless if you haven't used Social media in a while? | \n",
+ " 12. On a scale of 1 to 5, how easily distracted are you? | \n",
+ " 13. On a scale of 1 to 5, how much are you bothered by worries? | \n",
+ " 14. Do you find it difficult to concentrate on things? | \n",
+ " 15. On a scale of 1-5, how often do you compare yourself to other successful people through the use of social media? | \n",
+ " 16. Following the previous question, how do you feel about these comparisons, generally speaking? | \n",
+ " 17. How often do you look to seek validation from features of social media? | \n",
+ " 18. How often do you feel depressed or down? | \n",
+ " 19. On a scale of 1 to 5, how frequently does your interest in daily activities fluctuate? | \n",
+ " 20. On a scale of 1 to 5, how often do you face issues regarding sleep? | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4/18/2022 19:18:47 | \n",
+ " 21.0 | \n",
+ " Male | \n",
+ " In a relationship | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Twitter, Instagram, YouTube, Discord... | \n",
+ " Between 2 and 3 hours | \n",
+ " 5 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4/18/2022 19:19:28 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Twitter, Instagram, YouTube, Discord... | \n",
+ " More than 5 hours | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4/18/2022 19:25:59 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Instagram, YouTube, Pinterest | \n",
+ " Between 3 and 4 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4/18/2022 19:29:43 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Instagram | \n",
+ " More than 5 hours | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4/18/2022 19:33:31 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Instagram, YouTube | \n",
+ " Between 2 and 3 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 476 | \n",
+ " 5/21/2022 23:38:28 | \n",
+ " 24.0 | \n",
+ " Male | \n",
+ " Single | \n",
+ " Salaried Worker | \n",
+ " University, Private | \n",
+ " Yes | \n",
+ " Facebook, Instagram, YouTube | \n",
+ " Between 2 and 3 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 477 | \n",
+ " 5/22/2022 0:01:05 | \n",
+ " 26.0 | \n",
+ " Female | \n",
+ " Married | \n",
+ " Salaried Worker | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, YouTube | \n",
+ " Between 1 and 2 hours | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 478 | \n",
+ " 5/22/2022 10:29:21 | \n",
+ " 29.0 | \n",
+ " Female | \n",
+ " Married | \n",
+ " Salaried Worker | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, YouTube | \n",
+ " Between 2 and 3 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 479 | \n",
+ " 7/14/2022 19:33:47 | \n",
+ " 21.0 | \n",
+ " Male | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Twitter, Instagram, YouTube, Discord... | \n",
+ " Between 2 and 3 hours | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 480 | \n",
+ " 11/12/2022 13:16:50 | \n",
+ " 53.0 | \n",
+ " Male | \n",
+ " Married | \n",
+ " Salaried Worker | \n",
+ " Private | \n",
+ " Yes | \n",
+ " Facebook, YouTube | \n",
+ " Less than an Hour | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
481 rows × 21 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "smmh"
+ }
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh.shape"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.850291Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.85077Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.858597Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.850728Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.857036Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "wA_o3A9Gwh8r",
+ "outputId": "18720495-e1f2-43b9-de10-36bb37e057d4"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(481, 21)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh.columns"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.860343Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.860724Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.880767Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.860661Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.878909Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "JzRloe4_wh8s",
+ "outputId": "83570896-a37f-4079-c427-bfda5c3d9266"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['Timestamp', '1. What is your age?', '2. Gender',\n",
+ " '3. Relationship Status', '4. Occupation Status',\n",
+ " '5. What type of organizations are you affiliated with?',\n",
+ " '6. Do you use social media?',\n",
+ " '7. What social media platforms do you commonly use?',\n",
+ " '8. What is the average time you spend on social media every day?',\n",
+ " '9. How often do you find yourself using Social media without a specific purpose?',\n",
+ " '10. How often do you get distracted by Social media when you are busy doing something?',\n",
+ " '11. Do you feel restless if you haven't used Social media in a while?',\n",
+ " '12. On a scale of 1 to 5, how easily distracted are you?',\n",
+ " '13. On a scale of 1 to 5, how much are you bothered by worries?',\n",
+ " '14. Do you find it difficult to concentrate on things?',\n",
+ " '15. On a scale of 1-5, how often do you compare yourself to other successful people through the use of social media?',\n",
+ " '16. Following the previous question, how do you feel about these comparisons, generally speaking?',\n",
+ " '17. How often do you look to seek validation from features of social media?',\n",
+ " '18. How often do you feel depressed or down?',\n",
+ " '19. On a scale of 1 to 5, how frequently does your interest in daily activities fluctuate?',\n",
+ " '20. On a scale of 1 to 5, how often do you face issues regarding sleep?'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Renaming the Columns"
+ ],
+ "metadata": {
+ "id": "dHGe8Wemwh8s"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "new_column_names = {\n",
+ " 'Timestamp': 'timestamp',\n",
+ " '1. What is your age?': 'age',\n",
+ " '2. Gender': 'gender',\n",
+ " '3. Relationship Status': 'relationship_status',\n",
+ " '4. Occupation Status': 'occupation_status',\n",
+ " '5. What type of organizations are you affiliated with?': 'affiliated_organizations',\n",
+ " '6. Do you use social media?': 'use_social_media',\n",
+ " '7. What social media platforms do you commonly use?': 'social_media_platforms',\n",
+ " '8. What is the average time you spend on social media every day?': 'daily_social_media_time',\n",
+ " '9. How often do you find yourself using Social media without a specific purpose?': 'frequency_social_media_no_purpose',\n",
+ " '10. How often do you get distracted by Social media when you are busy doing something?': 'frequency_social_media_distracted',\n",
+ " \"11. Do you feel restless if you haven't used Social media in a while?\": 'restless_without_social_media',\n",
+ " '12. On a scale of 1 to 5, how easily distracted are you?': 'distractibility_scale',\n",
+ " '13. On a scale of 1 to 5, how much are you bothered by worries?': 'worry_level_scale',\n",
+ " '14. Do you find it difficult to concentrate on things?': 'difficulty_concentrating',\n",
+ " '15. On a scale of 1-5, how often do you compare yourself to other successful people through the use of social media?': 'compare_to_successful_people_scale',\n",
+ " '16. Following the previous question, how do you feel about these comparisons, generally speaking?': 'feelings_about_comparisons',\n",
+ " '17. How often do you look to seek validation from features of social media?': 'frequency_seeking_validation',\n",
+ " '18. How often do you feel depressed or down?': 'frequency_feeling_depressed',\n",
+ " '19. On a scale of 1 to 5, how frequently does your interest in daily activities fluctuate?': 'interest_fluctuation_scale',\n",
+ " '20. On a scale of 1 to 5, how often do you face issues regarding sleep?': 'sleep_issues_scale',\n",
+ "}\n",
+ "\n",
+ "smmh = smmh.rename(columns=new_column_names)\n"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.883761Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.884169Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.894544Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.884119Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.893726Z"
+ },
+ "trusted": true,
+ "id": "dLfpKO4fwh8u"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.89555Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.895926Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.934763Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.895891Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.933345Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 669
+ },
+ "id": "FCiHkA4Uwh8v",
+ "outputId": "a02213cb-3277-472d-b79f-a385ef41ab48"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " timestamp age gender relationship_status \\\n",
+ "0 4/18/2022 19:18:47 21.0 Male In a relationship \n",
+ "1 4/18/2022 19:19:28 21.0 Female Single \n",
+ "2 4/18/2022 19:25:59 21.0 Female Single \n",
+ "3 4/18/2022 19:29:43 21.0 Female Single \n",
+ "4 4/18/2022 19:33:31 21.0 Female Single \n",
+ ".. ... ... ... ... \n",
+ "476 5/21/2022 23:38:28 24.0 Male Single \n",
+ "477 5/22/2022 0:01:05 26.0 Female Married \n",
+ "478 5/22/2022 10:29:21 29.0 Female Married \n",
+ "479 7/14/2022 19:33:47 21.0 Male Single \n",
+ "480 11/12/2022 13:16:50 53.0 Male Married \n",
+ "\n",
+ " occupation_status affiliated_organizations use_social_media \\\n",
+ "0 University Student University Yes \n",
+ "1 University Student University Yes \n",
+ "2 University Student University Yes \n",
+ "3 University Student University Yes \n",
+ "4 University Student University Yes \n",
+ ".. ... ... ... \n",
+ "476 Salaried Worker University, Private Yes \n",
+ "477 Salaried Worker University Yes \n",
+ "478 Salaried Worker University Yes \n",
+ "479 University Student University Yes \n",
+ "480 Salaried Worker Private Yes \n",
+ "\n",
+ " social_media_platforms \\\n",
+ "0 Facebook, Twitter, Instagram, YouTube, Discord... \n",
+ "1 Facebook, Twitter, Instagram, YouTube, Discord... \n",
+ "2 Facebook, Instagram, YouTube, Pinterest \n",
+ "3 Facebook, Instagram \n",
+ "4 Facebook, Instagram, YouTube \n",
+ ".. ... \n",
+ "476 Facebook, Instagram, YouTube \n",
+ "477 Facebook, YouTube \n",
+ "478 Facebook, YouTube \n",
+ "479 Facebook, Twitter, Instagram, YouTube, Discord... \n",
+ "480 Facebook, YouTube \n",
+ "\n",
+ " daily_social_media_time frequency_social_media_no_purpose ... \\\n",
+ "0 Between 2 and 3 hours 5 ... \n",
+ "1 More than 5 hours 4 ... \n",
+ "2 Between 3 and 4 hours 3 ... \n",
+ "3 More than 5 hours 4 ... \n",
+ "4 Between 2 and 3 hours 3 ... \n",
+ ".. ... ... ... \n",
+ "476 Between 2 and 3 hours 3 ... \n",
+ "477 Between 1 and 2 hours 2 ... \n",
+ "478 Between 2 and 3 hours 3 ... \n",
+ "479 Between 2 and 3 hours 2 ... \n",
+ "480 Less than an Hour 2 ... \n",
+ "\n",
+ " restless_without_social_media distractibility_scale worry_level_scale \\\n",
+ "0 2 5 2 \n",
+ "1 2 4 5 \n",
+ "2 1 2 5 \n",
+ "3 1 3 5 \n",
+ "4 4 4 5 \n",
+ ".. ... ... ... \n",
+ "476 3 4 3 \n",
+ "477 2 3 4 \n",
+ "478 4 3 2 \n",
+ "479 2 3 3 \n",
+ "480 1 3 1 \n",
+ "\n",
+ " difficulty_concentrating compare_to_successful_people_scale \\\n",
+ "0 5 2 \n",
+ "1 4 5 \n",
+ "2 4 3 \n",
+ "3 3 5 \n",
+ "4 5 3 \n",
+ ".. ... ... \n",
+ "476 4 4 \n",
+ "477 4 4 \n",
+ "478 3 3 \n",
+ "479 2 2 \n",
+ "480 1 3 \n",
+ "\n",
+ " feelings_about_comparisons frequency_seeking_validation \\\n",
+ "0 3 2 \n",
+ "1 1 1 \n",
+ "2 3 1 \n",
+ "3 1 2 \n",
+ "4 3 3 \n",
+ ".. ... ... \n",
+ "476 2 4 \n",
+ "477 2 4 \n",
+ "478 3 4 \n",
+ "479 3 4 \n",
+ "480 3 2 \n",
+ "\n",
+ " frequency_feeling_depressed interest_fluctuation_scale \\\n",
+ "0 5 4 \n",
+ "1 5 4 \n",
+ "2 4 2 \n",
+ "3 4 3 \n",
+ "4 4 4 \n",
+ ".. ... ... \n",
+ "476 3 4 \n",
+ "477 4 4 \n",
+ "478 2 2 \n",
+ "479 4 5 \n",
+ "480 2 2 \n",
+ "\n",
+ " sleep_issues_scale \n",
+ "0 5 \n",
+ "1 5 \n",
+ "2 5 \n",
+ "3 2 \n",
+ "4 1 \n",
+ ".. ... \n",
+ "476 4 \n",
+ "477 1 \n",
+ "478 2 \n",
+ "479 4 \n",
+ "480 3 \n",
+ "\n",
+ "[481 rows x 21 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " timestamp | \n",
+ " age | \n",
+ " gender | \n",
+ " relationship_status | \n",
+ " occupation_status | \n",
+ " affiliated_organizations | \n",
+ " use_social_media | \n",
+ " social_media_platforms | \n",
+ " daily_social_media_time | \n",
+ " frequency_social_media_no_purpose | \n",
+ " ... | \n",
+ " restless_without_social_media | \n",
+ " distractibility_scale | \n",
+ " worry_level_scale | \n",
+ " difficulty_concentrating | \n",
+ " compare_to_successful_people_scale | \n",
+ " feelings_about_comparisons | \n",
+ " frequency_seeking_validation | \n",
+ " frequency_feeling_depressed | \n",
+ " interest_fluctuation_scale | \n",
+ " sleep_issues_scale | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 4/18/2022 19:18:47 | \n",
+ " 21.0 | \n",
+ " Male | \n",
+ " In a relationship | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Twitter, Instagram, YouTube, Discord... | \n",
+ " Between 2 and 3 hours | \n",
+ " 5 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 4/18/2022 19:19:28 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Twitter, Instagram, YouTube, Discord... | \n",
+ " More than 5 hours | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 4/18/2022 19:25:59 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Instagram, YouTube, Pinterest | \n",
+ " Between 3 and 4 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4/18/2022 19:29:43 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Instagram | \n",
+ " More than 5 hours | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 5 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4/18/2022 19:33:31 | \n",
+ " 21.0 | \n",
+ " Female | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Instagram, YouTube | \n",
+ " Between 2 and 3 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 5 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 476 | \n",
+ " 5/21/2022 23:38:28 | \n",
+ " 24.0 | \n",
+ " Male | \n",
+ " Single | \n",
+ " Salaried Worker | \n",
+ " University, Private | \n",
+ " Yes | \n",
+ " Facebook, Instagram, YouTube | \n",
+ " Between 2 and 3 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 477 | \n",
+ " 5/22/2022 0:01:05 | \n",
+ " 26.0 | \n",
+ " Female | \n",
+ " Married | \n",
+ " Salaried Worker | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, YouTube | \n",
+ " Between 1 and 2 hours | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 478 | \n",
+ " 5/22/2022 10:29:21 | \n",
+ " 29.0 | \n",
+ " Female | \n",
+ " Married | \n",
+ " Salaried Worker | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, YouTube | \n",
+ " Between 2 and 3 hours | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 479 | \n",
+ " 7/14/2022 19:33:47 | \n",
+ " 21.0 | \n",
+ " Male | \n",
+ " Single | \n",
+ " University Student | \n",
+ " University | \n",
+ " Yes | \n",
+ " Facebook, Twitter, Instagram, YouTube, Discord... | \n",
+ " Between 2 and 3 hours | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " 480 | \n",
+ " 11/12/2022 13:16:50 | \n",
+ " 53.0 | \n",
+ " Male | \n",
+ " Married | \n",
+ " Salaried Worker | \n",
+ " Private | \n",
+ " Yes | \n",
+ " Facebook, YouTube | \n",
+ " Less than an Hour | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
481 rows × 21 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "smmh"
+ }
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh_copy = smmh.copy()\n",
+ "smmh.info()"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.936112Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.936508Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.950232Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.936477Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.949234Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "IzQzprt0wh8w",
+ "outputId": "b3bb1b97-c9f8-46c1-cc2b-884fe3b11d5d"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 481 entries, 0 to 480\n",
+ "Data columns (total 21 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 timestamp 481 non-null object \n",
+ " 1 age 481 non-null float64\n",
+ " 2 gender 481 non-null object \n",
+ " 3 relationship_status 481 non-null object \n",
+ " 4 occupation_status 481 non-null object \n",
+ " 5 affiliated_organizations 451 non-null object \n",
+ " 6 use_social_media 481 non-null object \n",
+ " 7 social_media_platforms 481 non-null object \n",
+ " 8 daily_social_media_time 481 non-null object \n",
+ " 9 frequency_social_media_no_purpose 481 non-null int64 \n",
+ " 10 frequency_social_media_distracted 481 non-null int64 \n",
+ " 11 restless_without_social_media 481 non-null int64 \n",
+ " 12 distractibility_scale 481 non-null int64 \n",
+ " 13 worry_level_scale 481 non-null int64 \n",
+ " 14 difficulty_concentrating 481 non-null int64 \n",
+ " 15 compare_to_successful_people_scale 481 non-null int64 \n",
+ " 16 feelings_about_comparisons 481 non-null int64 \n",
+ " 17 frequency_seeking_validation 481 non-null int64 \n",
+ " 18 frequency_feeling_depressed 481 non-null int64 \n",
+ " 19 interest_fluctuation_scale 481 non-null int64 \n",
+ " 20 sleep_issues_scale 481 non-null int64 \n",
+ "dtypes: float64(1), int64(12), object(8)\n",
+ "memory usage: 79.0+ KB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh.isnull().sum()"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.951357Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.951813Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.976666Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.951785Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.974729Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "kf-Tp00pwh8x",
+ "outputId": "4bc4ccc4-ddc5-4bab-df46-6fefe26d28e4"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "timestamp 0\n",
+ "age 0\n",
+ "gender 0\n",
+ "relationship_status 0\n",
+ "occupation_status 0\n",
+ "affiliated_organizations 30\n",
+ "use_social_media 0\n",
+ "social_media_platforms 0\n",
+ "daily_social_media_time 0\n",
+ "frequency_social_media_no_purpose 0\n",
+ "frequency_social_media_distracted 0\n",
+ "restless_without_social_media 0\n",
+ "distractibility_scale 0\n",
+ "worry_level_scale 0\n",
+ "difficulty_concentrating 0\n",
+ "compare_to_successful_people_scale 0\n",
+ "feelings_about_comparisons 0\n",
+ "frequency_seeking_validation 0\n",
+ "frequency_feeling_depressed 0\n",
+ "interest_fluctuation_scale 0\n",
+ "sleep_issues_scale 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Imputing the Null Values"
+ ],
+ "metadata": {
+ "id": "sMAD1Boxwh8x"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "mode_value = smmh['affiliated_organizations'].mode()[0]\n",
+ "smmh['affiliated_organizations'].fillna(mode_value, inplace=True)"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.978284Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.978698Z",
+ "iopub.status.idle": "2023-10-30T11:28:16.987314Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.978663Z",
+ "shell.execute_reply": "2023-10-30T11:28:16.985753Z"
+ },
+ "trusted": true,
+ "id": "WpROFkRPwh8y"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh.isnull().sum()"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:16.991452Z",
+ "iopub.execute_input": "2023-10-30T11:28:16.991817Z",
+ "iopub.status.idle": "2023-10-30T11:28:17.007113Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:16.991786Z",
+ "shell.execute_reply": "2023-10-30T11:28:17.005336Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mcuiAEQ0wh8y",
+ "outputId": "602e9484-c6cb-4de8-c0fd-b4a1fd8f2293"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "timestamp 0\n",
+ "age 0\n",
+ "gender 0\n",
+ "relationship_status 0\n",
+ "occupation_status 0\n",
+ "affiliated_organizations 0\n",
+ "use_social_media 0\n",
+ "social_media_platforms 0\n",
+ "daily_social_media_time 0\n",
+ "frequency_social_media_no_purpose 0\n",
+ "frequency_social_media_distracted 0\n",
+ "restless_without_social_media 0\n",
+ "distractibility_scale 0\n",
+ "worry_level_scale 0\n",
+ "difficulty_concentrating 0\n",
+ "compare_to_successful_people_scale 0\n",
+ "feelings_about_comparisons 0\n",
+ "frequency_seeking_validation 0\n",
+ "frequency_feeling_depressed 0\n",
+ "interest_fluctuation_scale 0\n",
+ "sleep_issues_scale 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 20
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "So our data has no null values. we are going to proceed forward."
+ ],
+ "metadata": {
+ "id": "OdzYoeGcwh8y"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "duplicate_rows = smmh[smmh.duplicated()]\n",
+ "duplicate_rows.sum()"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:17.008477Z",
+ "iopub.execute_input": "2023-10-30T11:28:17.008891Z",
+ "iopub.status.idle": "2023-10-30T11:28:17.030945Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:17.00885Z",
+ "shell.execute_reply": "2023-10-30T11:28:17.029095Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "D0YtFED9wh8y",
+ "outputId": "1ff18be0-dbcb-45b5-8c1c-8fb8608ca8f1"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "timestamp 0\n",
+ "age 0.0\n",
+ "gender 0\n",
+ "relationship_status 0\n",
+ "occupation_status 0\n",
+ "affiliated_organizations 0\n",
+ "use_social_media 0\n",
+ "social_media_platforms 0\n",
+ "daily_social_media_time 0\n",
+ "frequency_social_media_no_purpose 0\n",
+ "frequency_social_media_distracted 0\n",
+ "restless_without_social_media 0\n",
+ "distractibility_scale 0\n",
+ "worry_level_scale 0\n",
+ "difficulty_concentrating 0\n",
+ "compare_to_successful_people_scale 0\n",
+ "feelings_about_comparisons 0\n",
+ "frequency_seeking_validation 0\n",
+ "frequency_feeling_depressed 0\n",
+ "interest_fluctuation_scale 0\n",
+ "sleep_issues_scale 0\n",
+ "dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "smmh.columns"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:17.033291Z",
+ "iopub.execute_input": "2023-10-30T11:28:17.033742Z",
+ "iopub.status.idle": "2023-10-30T11:28:17.04221Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:17.033693Z",
+ "shell.execute_reply": "2023-10-30T11:28:17.041181Z"
+ },
+ "trusted": true,
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FRIBH1_cwh8z",
+ "outputId": "d42017ba-c3f6-4ecc-fa92-03ce5a47ea3f"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['timestamp', 'age', 'gender', 'relationship_status',\n",
+ " 'occupation_status', 'affiliated_organizations', 'use_social_media',\n",
+ " 'social_media_platforms', 'daily_social_media_time',\n",
+ " 'frequency_social_media_no_purpose',\n",
+ " 'frequency_social_media_distracted', 'restless_without_social_media',\n",
+ " 'distractibility_scale', 'worry_level_scale',\n",
+ " 'difficulty_concentrating', 'compare_to_successful_people_scale',\n",
+ " 'feelings_about_comparisons', 'frequency_seeking_validation',\n",
+ " 'frequency_feeling_depressed', 'interest_fluctuation_scale',\n",
+ " 'sleep_issues_scale'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 22
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "from sklearn.metrics import mean_absolute_error\n",
+ "from joblib import dump\n",
+ "\n",
+ "smmh['timestamp'] = pd.to_datetime(smmh['timestamp'])\n",
+ "\n",
+ "numeric_columns = smmh.select_dtypes(include=['int64', 'float64']).columns\n",
+ "X = smmh[numeric_columns]\n",
+ "X = X.drop(columns=['distractibility_scale'])\n",
+ "y = smmh['distractibility_scale']\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+ "\n",
+ "rf_model = RandomForestRegressor(random_state=42)\n",
+ "rf_model.fit(X_train, y_train)\n",
+ "\n",
+ "y_pred_rf = rf_model.predict(X_test)\n",
+ "\n",
+ "mae_rf = mean_absolute_error(y_test, y_pred_rf)\n",
+ "print('Random Forest Mean Absolute Error:', mae_rf)\n",
+ "\n",
+ "dump(rf_model, 'random_forest_model.joblib')\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "btcw3cwf0n-K",
+ "outputId": "109a45f7-46b1-43be-96cc-09154a5daaac"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Random Forest Mean Absolute Error: 0.5792989690721649\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "['random_forest_model.joblib']"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(\"Columns used during training:\", X_train.columns)\n",
+ "print(\"No of columns used during training:\", len(X_train.columns))"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "E3TS5rNM_lnn",
+ "outputId": "aa696cf3-7b75-416f-c8d8-40fa269a6057"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Columns used during training: Index(['age', 'frequency_social_media_no_purpose',\n",
+ " 'frequency_social_media_distracted', 'restless_without_social_media',\n",
+ " 'worry_level_scale', 'difficulty_concentrating',\n",
+ " 'compare_to_successful_people_scale', 'feelings_about_comparisons',\n",
+ " 'frequency_seeking_validation', 'frequency_feeling_depressed',\n",
+ " 'interest_fluctuation_scale', 'sleep_issues_scale'],\n",
+ " dtype='object')\n",
+ "No of columns used during training: 12\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Descriptive Analysis\n",
+ "\n",
+ " 1. What is the average age of the survey respondents?\n",
+ " 2. How is the gender distribution among the respondents?\n",
+ " 3. What are the most common relationship statuses and occupation statuses among the respondents?\n",
+ " 4. Which social media platforms are the most commonly used among the survey participants?\n",
+ " 5. What is the distribution of daily social media usage times among the respondents?\n",
+ " 6. How often do respondents find themselves using social media without a specific purpose, and how does this vary by age and gender?"
+ ],
+ "metadata": {
+ "id": "Sk-aru5_wh8z"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### What is the average age of the survey respondents, and what is the age distribution in the dataset?"
+ ],
+ "metadata": {
+ "id": "mF92_eOSwh8z"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "average_age = smmh['age'].mean()\n",
+ "\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "plt.hist(smmh['age'], bins=20, edgecolor='k')\n",
+ "plt.title('Age Distribution of Survey Respondents')\n",
+ "plt.xlabel('Age')\n",
+ "plt.ylabel('Number of Respondents')\n",
+ "plt.show()"
+ ],
+ "metadata": {
+ "execution": {
+ "iopub.status.busy": "2023-10-30T11:28:17.044672Z",
+ "iopub.execute_input": "2023-10-30T11:28:17.045015Z",
+ "iopub.status.idle": "2023-10-30T11:28:17.287139Z",
+ "shell.execute_reply.started": "2023-10-30T11:28:17.044987Z",
+ "shell.execute_reply": "2023-10-30T11:28:17.285479Z"
+ },
+ "trusted": true,
+ "id": "6_LU7jGgwh8z",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 564
+ },
+ "outputId": "21dcee9f-69eb-4c21-af45-2c425402d387"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "