From 6a39ef50cc9345197c3fb6a6aa0aea71876bd7ec Mon Sep 17 00:00:00 2001
From: Chetan Thote <49151585+chetanthote@users.noreply.github.com>
Date: Fri, 26 Jul 2024 19:12:38 +0530
Subject: [PATCH] Modified Notebooks according to feedback (#106)

* Create load-CSV-data-S3

* Added notebooks for Load data sections of UI

* Modified with suggested changes

* Modified with suggested changes

* Remove extra header

* Modified with suggested changes and changed Kai Credentials

* Modified with suggested changes and add JSON notebook

* Update notebook.ipynb

* Update notebook.ipynb

* Modified with pre-commit checks

---------

Co-authored-by: chetan thote <chetan@chetans-MacBook-Pro.local>
Co-authored-by: Kevin D Smith <kevin.smith@octobergray.com>
---
 notebooks/load-csv-data-s3/notebook.ipynb     |  63 ++-
 notebooks/load-data-json/meta.toml            |  12 +
 notebooks/load-data-json/notebook.ipynb       | 494 ++++++++++++++++++
 notebooks/load-data-kakfa/notebook.ipynb      |  28 +-
 .../notebook.ipynb                            |   6 +-
 5 files changed, 577 insertions(+), 26 deletions(-)
 create mode 100644 notebooks/load-data-json/meta.toml
 create mode 100644 notebooks/load-data-json/notebook.ipynb
diff --git a/notebooks/load-csv-data-s3/notebook.ipynb b/notebooks/load-csv-data-s3/notebook.ipynb
index f570ff20..01b14595 100644
--- a/notebooks/load-csv-data-s3/notebook.ipynb
+++ b/notebooks/load-csv-data-s3/notebook.ipynb
@@ -81,7 +81,7 @@
       "id": "2d22fd53-2c18-40e5-bb38-6d8ebc06f1b8",
       "metadata": {},
       "source": [
-        "## Create a database\n",
+        "## Create a database (You can skip this Step if you are using Free Starter Tier)\n",
         "\n",
         "We need to create a database to work with in the following examples."
       ]
@@ -161,6 +161,15 @@
         "START PIPELINE SalesData_Pipeline;"
       ]
     },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "a402a924-5e09-4213-88f6-2723b39ee2aa",
+      "metadata": {},
+      "source": [
+        "### It might take around 1 min to load data from S3 to SingleStore table"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 4,
@@ -169,7 +178,7 @@
       "outputs": [],
       "source": [
         "%%sql\n",
-        "SELECT * FROM SalesData LIMIT 10"
+        "SELECT count(*) FROM SalesData"
       ]
     },
     {
@@ -296,28 +305,60 @@
       "source": [
         "## Conclusion\n",
         "\n",
-        "<div class=\"alert alert-block alert-warning\">\n",
-        "    <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n",
-        "    <div>\n",
-        "        <p><b>Action Required</b></p>\n",
-        "        <p> If you created a new database in your Standard or Premium Workspace, you can drop the database by running the cell below. Note: this will not drop your database for Free Starter Workspaces. To drop a Free Starter Workspace, terminate the Workspace using the UI. </p>\n",
-        "    </div>\n",
-        "</div>\n",
-        "\n",
         "We have shown how to insert data from a Amazon S3 using `Pipelines` to SingleStoreDB. These techniques should enable you to\n",
         "integrate your Amazon S3 with SingleStoreDB."
       ]
     },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "83b2d1e6-58b8-493e-a698-2fd46e2ac5a1",
+      "metadata": {},
+      "source": [
+        "## Clean up"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "f028e26e-66c0-44dc-9024-221687334301",
+      "metadata": {},
+      "source": [
+        "#### Drop Pipeline"
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": 10,
+      "id": "f1f7b94f-2018-464e-9a28-b71cb89d65e3",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "STOP PIPELINE SalesData_Pipeline;\n",
+        "\n",
+        "DROP PIPELINE SalesData_Pipeline;"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "33a246bd-36a3-4027-b44d-8c46768ff96d",
+      "metadata": {},
+      "source": [
+        "#### Drop Data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
       "id": "d5053a52-5579-4fea-9594-5250f6fcc289",
       "metadata": {},
       "outputs": [],
       "source": [
         "shared_tier_check = %sql show variables like 'is_shared_tier'\n",
         "if not shared_tier_check or shared_tier_check[0][1] == 'OFF':\n",
-        "    %sql DROP DATABASE IF EXISTS SalesAnalysis;"
+        "    %sql DROP DATABASE IF EXISTS SalesAnalysis;\n",
+        "else:\n",
+        "    %sql DROP TABLE SalesData;"
       ]
     },
     {
diff --git a/notebooks/load-data-json/meta.toml b/notebooks/load-data-json/meta.toml
new file mode 100644
index 00000000..fb7e29e1
--- /dev/null
+++ b/notebooks/load-data-json/meta.toml
@@ -0,0 +1,12 @@
+[meta]
+authors=["chetan-thote"]
+title="Employee Data Analysis JSON Dataset"
+description="""\
+    Employee Data Analysis use case illustrates how to leverage Singlestore's capabilities to process and analyze JSON data from a Amazon S3 data source.
+    """
+difficulty="beginner"
+tags=["starter", "loaddata", "json"]
+lesson_areas=["Ingest"]
+icon="database"
+destinations=["spaces"]
+minimum_tier="free-shared"
diff --git a/notebooks/load-data-json/notebook.ipynb b/notebooks/load-data-json/notebook.ipynb
new file mode 100644
index 00000000..1c37b46e
--- /dev/null
+++ b/notebooks/load-data-json/notebook.ipynb
@@ -0,0 +1,494 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "31e6c77f-8681-48ca-96fa-6b69dca531e8",
+      "metadata": {},
+      "source": [
+        "<div id=\"singlestore-header\" style=\"display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;\">\n",
+        "    <div id=\"icon-image\" style=\"width: 90px; height: 90px;\">\n",
+        "        <img width=\"100%\" height=\"100%\" src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/database.png\" />\n",
+        "    </div>\n",
+        "    <div id=\"text\" style=\"padding: 5px; margin-left: 10px;\">\n",
+        "        <div id=\"badge\" style=\"display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%\">SingleStore Notebooks</div>\n",
+        "        <h1 style=\"font-weight: 500; margin: 8px 0 0 4px;\">Employee Data Analysis JSON Dataset</h1>\n",
+        "    </div>\n",
+        "</div>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "76e0ea3a-08da-4f23-9ab2-3b3564396a06",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-block alert-warning\">\n",
+        "    <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n",
+        "    <div>\n",
+        "        <p><b>Note</b></p>\n",
+        "        <p>This notebook can be run on a Free Starter Workspace. To create a Free Starter Workspace navigate to <tt>Start</tt> using the left nav. You can also use your existing Standard or Premium workspace with this Notebook.</p>\n",
+        "    </div>\n",
+        "</div>"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "2acd4da5-dead-4087-b35a-3d5d74c68920",
+      "metadata": {},
+      "source": [
+        "In this example, we want to create a pipeline from multiple JSON files  stored in an AWS S3 bucket called singlestoredb and a folder called **employeedata**. This bucket is located in **ap-south-1**."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "6689afa7-01f6-434f-869e-579d3e3302cc",
+      "metadata": {},
+      "source": [
+        "Each file has the following shape with nested arrays:\n",
+        "\n",
+        "```json\n",
+        "{\n",
+        "  \"userId\": \"88-052-8576\",\n",
+        "  \"jobTitleName\": \"Social Worker\",\n",
+        "  \"firstName\": \"Mavis\",\n",
+        "  \"lastName\": \"Hilldrop\",\n",
+        "  \"dataofjoining\": \"20/09/2020\",\n",
+        "  \"contactinfo\": {\n",
+        "    \"city\": \"Dallas\",\n",
+        "    \"phone\": \"972-454-9822\",\n",
+        "    \"emailAddress\": \"mhilldrop0@google.ca\",\n",
+        "    \"state\": \"TX\",\n",
+        "    \"zipcode\": \"75241\"\n",
+        "  },\n",
+        "  \"Children\": [\n",
+        "    \"Evaleen\",\n",
+        "    \"Coletta\",\n",
+        "    \"Leonelle\"\n",
+        "  ],\n",
+        "  \"salary\": 203000\n",
+        "}\n",
+        "```"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "9ca0a1a6-7a08-4557-a03a-06e71cd02ee2",
+      "metadata": {},
+      "source": [
+        "<h3>Demo Flow</h3>"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "ff47d05c-a572-44a1-a5d7-33f6bf3cad8e",
+      "metadata": {},
+      "source": [
+        "<img src=https://singlestoreloaddata.s3.ap-south-1.amazonaws.com/images/JSONFlow.png width=\"100%\" hight=\"50%\"/>"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "6f780f30-2893-4444-9aa3-101ae46eb8a5",
+      "metadata": {},
+      "source": [
+        "## How to use this notebook"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "ba2456c6-e21d-4d2a-9ae9-e232507dca5f",
+      "metadata": {},
+      "source": [
+        "<img src=https://singlestoreloaddata.s3.ap-south-1.amazonaws.com/images/notebookuse.gif width=\"75%\" hight=\"50%\"/>"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "f91bc453-2b51-466a-be56-9bb5a278f323",
+      "metadata": {},
+      "source": [
+        "## Create a database (You can skip this Step if you are using Free Starter Tier)\n",
+        "\n",
+        "We need to create a database to work with in the following examples."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "id": "361a6558-e17b-407d-9412-a465296da263",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "shared_tier_check = %sql show variables like 'is_shared_tier'\n",
+        "if not shared_tier_check or shared_tier_check[0][1] == 'OFF':\n",
+        "    %sql DROP DATABASE IF EXISTS HRData;\n",
+        "    %sql CREATE DATABASE HRData;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "0fc75775-54ac-47db-b8a1-44deff739737",
+      "metadata": {},
+      "source": [
+        "<div class=\"alert alert-block alert-warning\">\n",
+        "    <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n",
+        "    <div>\n",
+        "        <p><b>Action Required</b></p>\n",
+        "        <p> If you have a Free Starter Workspace deployed already, select the database from drop-down menu at the top of this notebook. It updates the <tt>connection_url</tt> to connect to that database.</p>\n",
+        "    </div>\n",
+        "</div>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "id": "c2218fe5-1149-4979-bf73-4f6a24af25a0",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "CREATE TABLE IF NOT EXISTS employeeData (\n",
+        "    userId text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
+        "    jobTitleName text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
+        "    firstName text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
+        "    lastName text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
+        "    dataofjoining text CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,\n",
+        "    contactinfo JSON COLLATE utf8_bin NOT NULL,\n",
+        "    salary int NOT NULL,\n",
+        "    Children JSON COLLATE utf8_bin NOT NULL\n",
+        "    );"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "bc9f92c2-7683-4af0-851d-2f504d594d73",
+      "metadata": {},
+      "source": [
+        "### Create Pipeline To Insert JSON Data into Table"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "id": "f4abdfaa-458a-47ee-91af-1a8029b1ed72",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "\n",
+        "CREATE PIPELINE employeeData AS\n",
+        "LOAD DATA S3 'singlestoreloaddata/employeedata/*.json'\n",
+        "CONFIG '{ \\\"region\\\": \\\"ap-south-1\\\" }'\n",
+        "    /*\n",
+        "    CREDENTIALS '{\"aws_access_key_id\": \"<Key to Enter>\",\n",
+        "                  \"aws_secret_access_key\": \"<Key to Enter>\"}'\n",
+        "    */\n",
+        "INTO TABLE employeeData\n",
+        "FORMAT JSON\n",
+        "(\n",
+        "    userId <- userId,\n",
+        "    jobTitleName <- jobTitleName,\n",
+        "    firstName <- firstName,\n",
+        "    lastName <- lastName,\n",
+        "    dataofjoining <- dataofjoining,\n",
+        "    contactinfo <- contactinfo,\n",
+        "    salary <- salary,\n",
+        "    Children <- Children\n",
+        ");\n",
+        "\n",
+        "START PIPELINE employeeData;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "a48f3498-e139-46a6-b2fe-92ab6b64ee06",
+      "metadata": {},
+      "source": [
+        "### Check if Data is Loaded"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "id": "84e4cc9a-e81f-462c-ab4f-d5c2c6b5646c",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "SELECT * from employeeData limit 5;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "fee17ba7-69cc-42cf-b65e-8aca025208ac",
+      "metadata": {},
+      "source": [
+        "### Sample Queries"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "1d45f50e-d62b-4bd5-bc1a-f9ad26784925",
+      "metadata": {},
+      "source": [
+        "#### Select Top 2 Employees with highest salary risiding in State 'MS'"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "id": "85798a69-80f7-4769-b76b-a65685dd539a",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "select * from employeeData where contactinfo::$state = 'MS' order by salary desc limit 2"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "e19bb996-1449-45fe-b14d-830ce7daa424",
+      "metadata": {},
+      "source": [
+        "#### Select Top 5 Cities with highest Average salary"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "id": "f902ed4c-4f7c-450e-8775-a8ecd3f1e096",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "select contactinfo::$city as City,AVG(salary) as 'Avg Salary' from employeeData\n",
+        "    group by contactinfo::$city  order by AVG(salary) desc limit 5"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "bfcc74ff-8804-466d-b1d9-89a97c59f89f",
+      "metadata": {},
+      "source": [
+        "#### Number of employees with Children grouped by No of children"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "id": "ccdb22a9-adb0-415b-b9bb-088d8b510edd",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "SELECT\n",
+        "    JSON_LENGTH(Children) as No_of_Kids,\n",
+        "    COUNT(*) AS employees_with_children\n",
+        "FROM employeeData\n",
+        "    group by JSON_LENGTH(Children);"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "085f7620-ecd7-437a-a6ae-fafd78585411",
+      "metadata": {},
+      "source": [
+        "#### Average salary of employees who have children"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "id": "feedfc95-7391-4477-8e0c-b066722f3989",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "SELECT\n",
+        "    AVG(salary) AS average_salary_with_children\n",
+        "FROM employeeData\n",
+        "WHERE JSON_LENGTH(Children) > 0;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "5f2f1218-a929-4776-9a1c-c23b87e4dfaf",
+      "metadata": {},
+      "source": [
+        "#### Select the total and average salary by State"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "id": "30552614-dcbc-4ee4-a04b-c0ba66d4f4e5",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "SELECT\n",
+        "    contactinfo::$state AS State,\n",
+        "    COUNT(*) AS 'No of Employees',\n",
+        "    SUM(salary) AS 'Total Salary',\n",
+        "    AVG(salary) AS 'Average Salary'\n",
+        "FROM employeeData\n",
+        "GROUP BY contactinfo::$state limit 5;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "e11369f8-1bc1-4084-bfe1-a77ba8504c40",
+      "metadata": {},
+      "source": [
+        "#### Top 5 job title with highest number of employees"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "id": "fea75987-4fc1-46a8-bf70-04288c0084b1",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "SELECT\n",
+        "    jobTitleName,\n",
+        "    COUNT(*) AS num_employees\n",
+        "FROM employeeData\n",
+        "GROUP BY jobTitleName order by num_employees desc limit 5;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "78c1226f-c1b8-4000-8c6f-9b375474db07",
+      "metadata": {},
+      "source": [
+        "#### Select the highest and lowest salary"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "id": "68981205-804c-4ca3-bea3-9a87d3c83f16",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "SELECT\n",
+        "    MAX(salary) AS highest_salary,\n",
+        "    MIN(salary) AS lowest_salary\n",
+        "FROM employeeData;"
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "b8f0e958-1ca1-4ee9-b9e7-ea8a38715a6a",
+      "metadata": {},
+      "source": [
+        "## Conclusion\n",
+        "\n",
+        "\n",
+        "We have shown how to connect to S3 using `Pipelines` and insert JSON data into SinglestoreDB. These techniques should enable you to\n",
+        "integrate and query your JSON data with SingleStoreDB."
+      ]
+    },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "30f3625a-a0b4-4355-9ea0-1be4bd873286",
+      "metadata": {},
+      "source": [
+        "### Clean up\n",
+        "\n",
+        "Drop the pipeline using below command"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "id": "b2634fe2-06fd-40bd-84ff-c2018a05a948",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "%%sql\n",
+        "STOP PIPELINE employeeData;\n",
+        "\n",
+        "DROP PIPELINE employeeData;"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "eef58faa-222b-4156-9a27-421bc0f5cbcc",
+      "metadata": {},
+      "source": [
+        "Drop data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "id": "5e3d81e2-aef1-413f-b435-b66d2ae8dfc3",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "shared_tier_check = %sql show variables like 'is_shared_tier'\n",
+        "if not shared_tier_check or shared_tier_check[0][1] == 'OFF':\n",
+        "    %sql DROP DATABASE IF EXISTS HRData;\n",
+        "else:\n",
+        "    %sql DROP TABLE employeeData;"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "666f1be8-892a-4278-b261-6a92111027aa",
+      "metadata": {},
+      "source": [
+        "<div id=\"singlestore-footer\" style=\"background-color: rgba(194, 193, 199, 0.25); height:2px; margin-bottom:10px\"></div>\n",
+        "<div><img src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/singlestore-logo-grey.png\" style=\"padding: 0px; margin: 0px; height: 24px\"/></div>"
+      ]
+    }
+  ],
+  "metadata": {
+    "jupyterlab": {
+      "notebooks": {
+        "version_major": 6,
+        "version_minor": 4
+      }
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.11.6"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}
diff --git a/notebooks/load-data-kakfa/notebook.ipynb b/notebooks/load-data-kakfa/notebook.ipynb
index ac28ae67..af3ec376 100644
--- a/notebooks/load-data-kakfa/notebook.ipynb
+++ b/notebooks/load-data-kakfa/notebook.ipynb
@@ -81,7 +81,7 @@
       "id": "5f963a4f-0eb0-4282-bc2f-f8bf48eef971",
       "metadata": {},
       "source": [
-        "## Create a database\n",
+        "## Create a database (You can skip this Step if you are using Free Starter Tier)\n",
         "\n",
         "We need to create a database to work with in the following examples."
       ]
@@ -131,6 +131,7 @@
       "outputs": [],
       "source": [
         "%%sql\n",
+        "\n",
         "CREATE TABLE `eventsdata` (\n",
         "  `user_id` varchar(120) DEFAULT NULL,\n",
         "  `event_name` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci DEFAULT NULL,\n",
@@ -163,7 +164,6 @@
         "%%sql\n",
         "CREATE PIPELINE `eventsdata`\n",
         "AS LOAD DATA KAFKA 'public-kafka.memcompute.com:9092/ad_events'\n",
-        "BATCH_INTERVAL 2500\n",
         "ENABLE OUT_OF_ORDER OPTIMIZATION\n",
         "DISABLE OFFSETS METADATA GC\n",
         "INTO TABLE `eventsdata`\n",
@@ -179,9 +179,9 @@
         "    `events`.`page_url`,\n",
         "    `events`.`region`,\n",
         "    `events`.`country`\n",
-        ")\n",
+        ");\n",
         "\n",
-        "START PIPELINE `eventsdata`"
+        "START PIPELINE `eventsdata`;"
       ]
     },
     {
@@ -320,18 +320,20 @@
       "source": [
         "## Conclusion\n",
         "\n",
-        "<div class=\"alert alert-block alert-warning\">\n",
-        "    <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n",
-        "    <div>\n",
-        "        <p><b>Action Required</b></p>\n",
-        "        <p> If you created a new database in your Standard or Premium Workspace, you can drop the database by running the cell below. Note: this will not drop your database for Free Starter Workspaces. To drop a Free Starter Workspace, terminate the Workspace using the UI. </p>\n",
-        "    </div>\n",
-        "</div>\n",
         "\n",
         "We have shown how to connect to Kafka using `Pipelines` and insert data into SinglestoreDB. These techniques should enable you to\n",
         "integrate your Kafka topics with SingleStoreDB."
       ]
     },
+    {
+      "attachments": {},
+      "cell_type": "markdown",
+      "id": "d59ce5f9-e6f1-4dee-a87c-3dedfb34bd69",
+      "metadata": {},
+      "source": [
+        "### Clean up"
+      ]
+    },
     {
       "attachments": {},
       "cell_type": "markdown",
@@ -361,7 +363,9 @@
       "source": [
         "shared_tier_check = %sql show variables like 'is_shared_tier'\n",
         "if not shared_tier_check or shared_tier_check[0][1] == 'OFF':\n",
-        "    %sql DROP DATABASE IF EXISTS EventAnalysis;"
+        "    %sql DROP DATABASE IF EXISTS EventAnalysis;\n",
+        "else:\n",
+        "    %sql DROP TABLE eventsdata;"
       ]
     },
     {
diff --git a/notebooks/unified-data-analysis-sql-nosql-kai/notebook.ipynb b/notebooks/unified-data-analysis-sql-nosql-kai/notebook.ipynb
index 11aaa5ab..acab8263 100644
--- a/notebooks/unified-data-analysis-sql-nosql-kai/notebook.ipynb
+++ b/notebooks/unified-data-analysis-sql-nosql-kai/notebook.ipynb
@@ -59,7 +59,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "pip install pymongo prettytable matplotlib --quiet"
+        "!pip install pymongo prettytable matplotlib --quiet"
       ]
     },
     {
@@ -280,8 +280,8 @@
         "    \"mongodb.members.auto.discover\": \"true\"\n",
         "    }'\n",
         "CREDENTIALS '{\n",
-        "    \"mongodb.user\":\"forimport\",\n",
-        "    \"mongodb.password\":\"4Zfb0SKGCcDz5bBt\"\n",
+        "    \"mongodb.user\":\"mongo_sample_reader\",\n",
+        "    \"mongodb.password\":\"SingleStoreRocks27017\"\n",
         "    }';"
       ]
     },