From c5db129289d24c4462e3dac3b33525d95270d4e8 Mon Sep 17 00:00:00 2001 From: Chetan Thote <49151585+chetanthote@users.noreply.github.com> Date: Tue, 1 Oct 2024 19:26:07 +0530 Subject: [PATCH] Notebook added for Singlestore Now Raffle (#116) Co-authored-by: chetan thote --- notebooks/singlestore-now-2024/meta.toml | 11 + notebooks/singlestore-now-2024/notebook.ipynb | 363 ++++++++++++++++++ 2 files changed, 374 insertions(+) create mode 100644 notebooks/singlestore-now-2024/meta.toml create mode 100644 notebooks/singlestore-now-2024/notebook.ipynb diff --git a/notebooks/singlestore-now-2024/meta.toml b/notebooks/singlestore-now-2024/meta.toml new file mode 100644 index 00000000..87fe109b --- /dev/null +++ b/notebooks/singlestore-now-2024/meta.toml @@ -0,0 +1,11 @@ +[meta] +authors=["chetan-thote"] +title = "Singlestore Now 2024 Raffle" +description = """ + "Explore the power of SingleStore in this interactive notebook by creating an account, loading data, and running queries for a chance to win the SignleStore Now 2024 Raffle!" """ +icon = "radar" +difficulty="intermediate" +tags = ["mongo", "embeddings", "vector", "genai", "kai", "starter"] +lesson_areas=["Kai", "AI"] +destinations = ["spaces"] +minimum_tier="free-shared" diff --git a/notebooks/singlestore-now-2024/notebook.ipynb b/notebooks/singlestore-now-2024/notebook.ipynb new file mode 100644 index 00000000..98f867e3 --- /dev/null +++ b/notebooks/singlestore-now-2024/notebook.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1d35bf5a-7a16-4eea-9a45-797273ac5491", + "metadata": {}, + "source": [ + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "
SingleStore Notebooks
\n", + "

Singlestore Now 2024 Raffle

\n", + "
\n", + "
" + ] + }, + { + "cell_type": "markdown", + "id": "5fc3a6d9-e064-40dd-8cd0-636a567d5af0", + "metadata": {}, + "source": [ + "
\n", + " \n", + "
\n", + "

Note

\n", + "

This notebook can be run on a Free Starter Workspace. To create a Free Starter Workspace navigate to Start using the left nav. You can also use your existing Standard or Premium workspace with this Notebook.

\n", + "
\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "dfc73c1e-9918-4d0a-ab22-4187a9c47678", + "metadata": {}, + "source": [ + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "1c7f4c37-2c1d-4507-9564-de2bea190005", + "metadata": {}, + "source": [ + "## Install libraries and import modules" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fb64cdc7-3ff1-4809-a9f1-9f0e770874b3", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pymongo pandas ipywidgets --quiet" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "784ccd70-014c-429a-8325-91407fbf0e96", + "metadata": {}, + "outputs": [], + "source": [ + "shared_tier_check = %sql show variables like 'is_shared_tier'\n", + "\n", + "if shared_tier_check and shared_tier_check[0][1] == 'ON':\n", + " current_database = %sql SELECT DATABASE() as CurrentDatabase\n", + " database_to_use = current_database[0][0]\n", + "else:\n", + " database_to_use = \"new_transactions\"\n", + " %sql CREATE DATABASE {{database_to_use}}" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3f1f2731-e117-4ead-871a-5711eb1cb391", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pymongo\n", + "from pymongo import MongoClient" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ca323068-a897-478f-839a-244f4bbc1719", + "metadata": {}, + "source": [ + "## Connect to Atlas and SingleStore Kai endpoints\n", + "We are using a shared tier on the backend for Atlas" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5bf785b4-79c6-440f-9bb1-34a033c9f4db", + "metadata": {}, + "outputs": [], + "source": [ + "# No need to edit anything\n", + "myclientmongodb = pymongo.MongoClient(\"mongodb+srv://mongo_sample_reader:SingleStoreRocks27017@cluster1.tfutgo0.mongodb.net/?retryWrites=true&w=majority\")\n", + "mydbmongodb = myclientmongodb[\"new_transactions\"]\n", + "mongoitems = mydbmongodb[\"items\"]\n", + "mongocusts = mydbmongodb[\"custs\"]\n", + "mongotxs = mydbmongodb[\"txs\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "20e25f4a-a6ce-4e3a-80c5-c56002945c7e", + "metadata": {}, + "outputs": [], + "source": [ + "s2clientmongodb = pymongo.MongoClient(connection_url_kai)\n", + "s2dbmongodb = s2clientmongodb[database_to_use]\n", + "s2mongoitems = s2dbmongodb[\"items\"]\n", + "s2mongocusts = s2dbmongodb[\"custs\"]\n", + "s2mongotxs = s2dbmongodb[\"txs\"]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "36c6162c-e0a2-404b-8d9f-9af8df8b8cea", + "metadata": {}, + "source": [ + "## Copy Atlas collections into SingleStore Kai" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ebbefa07-2fbf-468c-bf65-00e12dcc606f", + "metadata": {}, + "outputs": [], + "source": [ + "mongocollections = [mongoitems, mongocusts, mongotxs]\n", + "\n", + "for mongo_collection in mongocollections:\n", + " df = pd.DataFrame(list(mongo_collection.find())).reset_index(drop=True)\n", + " data_dict = df.to_dict(orient='records')\n", + " s2mongo_collection = s2dbmongodb[mongo_collection.name]\n", + " s2mongo_collection.insert_many(data_dict)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "ca4dbc9b-f96a-46c1-a4ac-aa761e0d19ec", + "metadata": {}, + "source": [ + "## Total quantity of products sold across all products" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2d3e0782-198f-4539-92cd-91e1758db721", + "metadata": {}, + "outputs": [], + "source": [ + "num_iterations = 10\n", + "mongo_times = []\n", + "\n", + "# Updated pipeline for total quantity of products sold across all products\n", + "pipeline = [\n", + " {\"$group\": {\"_id\": None, \"totalQuantity\": {\"$sum\": \"$item.quantity\"}}}\n", + "]\n", + "\n", + "# Simulating same for s2mongoitems\n", + "s2_times = []\n", + "for i in range(num_iterations):\n", + " s2_start_time = time.time()\n", + " s2_result = s2mongoitems.aggregate(pipeline)\n", + " s2_stop_time = time.time()\n", + " s2_times.append(s2_stop_time - s2_start_time)\n", + "\n", + "# Retrieving total quantity from the result\n", + "total_quantity = next(s2_result)[\"totalQuantity\"] if s2_result else 0\n", + "\n", + "# Returning the numeric values of total quantity sold\n", + "print(\"Total Product Quantity Sold is\",total_quantity)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "58f643e0-0205-4cf7-97de-dcd93bef0a64", + "metadata": {}, + "source": [ + "## Top selling Product" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a31e6d36-9eb3-43d3-a8c9-50a740d8d36c", + "metadata": {}, + "outputs": [], + "source": [ + "# Updated pipeline to return the #1 selling product based on total quantity sold\n", + "pipeline = [\n", + " {\"$group\": {\n", + " \"_id\": \"$item.name\", # Group by product name\n", + " \"total_quantity_sold\": {\"$sum\": \"$item.quantity\"} # Sum of quantities sold\n", + " }},\n", + " {\"$sort\": {\"total_quantity_sold\": -1}}, # Sort by total quantity sold in descending order\n", + " {\"$limit\": 1} # Limit to the top product\n", + "]\n", + "\n", + "s2_result = s2mongoitems.aggregate(pipeline)\n", + "\n", + "# Retrieve the name of the #1 selling product\n", + "top_product = next(s2_result, None)\n", + "if top_product:\n", + " product_name = top_product[\"_id\"]\n", + " total_quantity_sold = top_product[\"total_quantity_sold\"]\n", + "else:\n", + " product_name = \"No Data\"\n", + " total_quantity_sold = 0\n", + "\n", + "# Return the #1 selling product and its total quantity sold\n", + "print(\"Top-Selling product : \",product_name,\"With total quantity sold \",total_quantity_sold)" + ] + }, + { + "cell_type": "markdown", + "id": "e45de51e-f54b-4788-8fb3-2aadc9143533", + "metadata": {}, + "source": [ + "## Top selling Location" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "923bf8d1-6869-4448-9916-80e4f1b6e3f0", + "metadata": {}, + "outputs": [], + "source": [ + "# Updated pipeline to exclude \"Online\" and get top-selling location\n", + "pipeline = [\n", + " {\"$lookup\":\n", + " {\n", + " \"from\": \"custs\",\n", + " \"localField\": \"customer.email\",\n", + " \"foreignField\": \"email\",\n", + " \"as\": \"transaction_links\",\n", + " }\n", + " },\n", + " {\"$match\": {\"store_location\": {\"$ne\": \"Online\"}}}, # Exclude Online location\n", + " {\"$limit\": 100},\n", + " {\"$group\":\n", + " {\n", + " \"_id\": {\"location\": \"$store_location\"},\n", + " \"count\": {\"$sum\": 1}\n", + " }\n", + " },\n", + " {\"$sort\": {\"count\": -1}},\n", + " {\"$limit\": 1}\n", + "]\n", + "\n", + "\n", + "s2_result = s2mongotxs.aggregate(pipeline)\n", + "\n", + "\n", + "# Retrieve the top-selling location excluding \"Online\"\n", + "top_location = next(s2_result, None)\n", + "if top_location:\n", + " location_name = top_location[\"_id\"][\"location\"]\n", + " transaction_count = top_location[\"count\"]\n", + "else:\n", + " location_name = \"No Data\"\n", + " transaction_count = 0\n", + "\n", + "# Return the top-selling location and transaction count\n", + "\n", + "print(\"Top-Selling Location : \",location_name,\"With transaction of Count \",transaction_count)" + ] + }, + { + "cell_type": "markdown", + "id": "93934fde-c22e-4bda-992f-ed01dc83283c", + "metadata": {}, + "source": [ + "## Clean up" + ] + }, + { + "cell_type": "markdown", + "id": "599ca6e3-3847-467a-8a33-8f91e52a9cd1", + "metadata": {}, + "source": [ + "
\n", + " \n", + "
\n", + "

Action Required

\n", + "

If you created a new database in your Standard or Premium Workspace, you can drop the database by running the cell below. Note: this will not drop your database for Free Starter Workspaces. To drop a Free Starter Workspace, terminate the Workspace using the UI.

\n", + "
\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2f4cf3c7-5e1f-442e-8b6e-e4f106ded82b", + "metadata": {}, + "outputs": [], + "source": [ + "shared_tier_check = %sql show variables like 'is_shared_tier'\n", + "if not shared_tier_check or shared_tier_check[0][1] == 'OFF':\n", + " %sql DROP DATABASE IF EXISTS new_transactions;" + ] + }, + { + "cell_type": "markdown", + "id": "9635adf8-8137-4637-b94d-22835ba8112d", + "metadata": {}, + "source": [ + "
\n", + "
" + ] + } + ], + "metadata": { + "jupyterlab": { + "notebooks": { + "version_major": 6, + "version_minor": 4 + } + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}