-
Notifications
You must be signed in to change notification settings - Fork 52
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Notebook added for Singlestore Now Raffle (#116)
Co-authored-by: chetan thote <[email protected]>
- Loading branch information
1 parent
64e60fb
commit c5db129
Showing
2 changed files
with
374 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
[meta] | ||
authors=["chetan-thote"] | ||
title = "Singlestore Now 2024 Raffle" | ||
description = """ | ||
"Explore the power of SingleStore in this interactive notebook by creating an account, loading data, and running queries for a chance to win the SignleStore Now 2024 Raffle!" """ | ||
icon = "radar" | ||
difficulty="intermediate" | ||
tags = ["mongo", "embeddings", "vector", "genai", "kai", "starter"] | ||
lesson_areas=["Kai", "AI"] | ||
destinations = ["spaces"] | ||
minimum_tier="free-shared" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,363 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "1d35bf5a-7a16-4eea-9a45-797273ac5491", | ||
"metadata": {}, | ||
"source": [ | ||
"<div id=\"singlestore-header\" style=\"display: flex; background-color: rgba(255, 182, 176, 0.25); padding: 5px;\">\n", | ||
" <div id=\"icon-image\" style=\"width: 90px; height: 90px;\">\n", | ||
" <img width=\"100%\" height=\"100%\" src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/radar.png\" />\n", | ||
" </div>\n", | ||
" <div id=\"text\" style=\"padding: 5px; margin-left: 10px;\">\n", | ||
" <div id=\"badge\" style=\"display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%\">SingleStore Notebooks</div>\n", | ||
" <h1 style=\"font-weight: 500; margin: 8px 0 0 4px;\">Singlestore Now 2024 Raffle</h1>\n", | ||
" </div>\n", | ||
"</div>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "5fc3a6d9-e064-40dd-8cd0-636a567d5af0", | ||
"metadata": {}, | ||
"source": [ | ||
"<div class=\"alert alert-block alert-warning\">\n", | ||
" <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n", | ||
" <div>\n", | ||
" <p><b>Note</b></p>\n", | ||
" <p>This notebook can be run on a Free Starter Workspace. To create a Free Starter Workspace navigate to <tt>Start</tt> using the left nav. You can also use your existing Standard or Premium workspace with this Notebook.</p>\n", | ||
" </div>\n", | ||
"</div>" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "dfc73c1e-9918-4d0a-ab22-4187a9c47678", | ||
"metadata": {}, | ||
"source": [ | ||
"<img src=https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/notebooks/atlas-and-kai/images/mongo-db-singlestoredb.png width=\"100%\">" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "1c7f4c37-2c1d-4507-9564-de2bea190005", | ||
"metadata": {}, | ||
"source": [ | ||
"## Install libraries and import modules" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "fb64cdc7-3ff1-4809-a9f1-9f0e770874b3", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"!pip install pymongo pandas ipywidgets --quiet" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "784ccd70-014c-429a-8325-91407fbf0e96", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"shared_tier_check = %sql show variables like 'is_shared_tier'\n", | ||
"\n", | ||
"if shared_tier_check and shared_tier_check[0][1] == 'ON':\n", | ||
" current_database = %sql SELECT DATABASE() as CurrentDatabase\n", | ||
" database_to_use = current_database[0][0]\n", | ||
"else:\n", | ||
" database_to_use = \"new_transactions\"\n", | ||
" %sql CREATE DATABASE {{database_to_use}}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "3f1f2731-e117-4ead-871a-5711eb1cb391", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import time\n", | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import pymongo\n", | ||
"from pymongo import MongoClient" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "ca323068-a897-478f-839a-244f4bbc1719", | ||
"metadata": {}, | ||
"source": [ | ||
"## Connect to Atlas and SingleStore Kai endpoints\n", | ||
"We are using a shared tier on the backend for Atlas" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "5bf785b4-79c6-440f-9bb1-34a033c9f4db", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# No need to edit anything\n", | ||
"myclientmongodb = pymongo.MongoClient(\"mongodb+srv://mongo_sample_reader:[email protected]/?retryWrites=true&w=majority\")\n", | ||
"mydbmongodb = myclientmongodb[\"new_transactions\"]\n", | ||
"mongoitems = mydbmongodb[\"items\"]\n", | ||
"mongocusts = mydbmongodb[\"custs\"]\n", | ||
"mongotxs = mydbmongodb[\"txs\"]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"id": "20e25f4a-a6ce-4e3a-80c5-c56002945c7e", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"s2clientmongodb = pymongo.MongoClient(connection_url_kai)\n", | ||
"s2dbmongodb = s2clientmongodb[database_to_use]\n", | ||
"s2mongoitems = s2dbmongodb[\"items\"]\n", | ||
"s2mongocusts = s2dbmongodb[\"custs\"]\n", | ||
"s2mongotxs = s2dbmongodb[\"txs\"]" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "36c6162c-e0a2-404b-8d9f-9af8df8b8cea", | ||
"metadata": {}, | ||
"source": [ | ||
"## Copy Atlas collections into SingleStore Kai" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "ebbefa07-2fbf-468c-bf65-00e12dcc606f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"mongocollections = [mongoitems, mongocusts, mongotxs]\n", | ||
"\n", | ||
"for mongo_collection in mongocollections:\n", | ||
" df = pd.DataFrame(list(mongo_collection.find())).reset_index(drop=True)\n", | ||
" data_dict = df.to_dict(orient='records')\n", | ||
" s2mongo_collection = s2dbmongodb[mongo_collection.name]\n", | ||
" s2mongo_collection.insert_many(data_dict)" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "ca4dbc9b-f96a-46c1-a4ac-aa761e0d19ec", | ||
"metadata": {}, | ||
"source": [ | ||
"## Total quantity of products sold across all products" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 7, | ||
"id": "2d3e0782-198f-4539-92cd-91e1758db721", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"num_iterations = 10\n", | ||
"mongo_times = []\n", | ||
"\n", | ||
"# Updated pipeline for total quantity of products sold across all products\n", | ||
"pipeline = [\n", | ||
" {\"$group\": {\"_id\": None, \"totalQuantity\": {\"$sum\": \"$item.quantity\"}}}\n", | ||
"]\n", | ||
"\n", | ||
"# Simulating same for s2mongoitems\n", | ||
"s2_times = []\n", | ||
"for i in range(num_iterations):\n", | ||
" s2_start_time = time.time()\n", | ||
" s2_result = s2mongoitems.aggregate(pipeline)\n", | ||
" s2_stop_time = time.time()\n", | ||
" s2_times.append(s2_stop_time - s2_start_time)\n", | ||
"\n", | ||
"# Retrieving total quantity from the result\n", | ||
"total_quantity = next(s2_result)[\"totalQuantity\"] if s2_result else 0\n", | ||
"\n", | ||
"# Returning the numeric values of total quantity sold\n", | ||
"print(\"Total Product Quantity Sold is\",total_quantity)" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "58f643e0-0205-4cf7-97de-dcd93bef0a64", | ||
"metadata": {}, | ||
"source": [ | ||
"## Top selling Product" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 8, | ||
"id": "a31e6d36-9eb3-43d3-a8c9-50a740d8d36c", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Updated pipeline to return the #1 selling product based on total quantity sold\n", | ||
"pipeline = [\n", | ||
" {\"$group\": {\n", | ||
" \"_id\": \"$item.name\", # Group by product name\n", | ||
" \"total_quantity_sold\": {\"$sum\": \"$item.quantity\"} # Sum of quantities sold\n", | ||
" }},\n", | ||
" {\"$sort\": {\"total_quantity_sold\": -1}}, # Sort by total quantity sold in descending order\n", | ||
" {\"$limit\": 1} # Limit to the top product\n", | ||
"]\n", | ||
"\n", | ||
"s2_result = s2mongoitems.aggregate(pipeline)\n", | ||
"\n", | ||
"# Retrieve the name of the #1 selling product\n", | ||
"top_product = next(s2_result, None)\n", | ||
"if top_product:\n", | ||
" product_name = top_product[\"_id\"]\n", | ||
" total_quantity_sold = top_product[\"total_quantity_sold\"]\n", | ||
"else:\n", | ||
" product_name = \"No Data\"\n", | ||
" total_quantity_sold = 0\n", | ||
"\n", | ||
"# Return the #1 selling product and its total quantity sold\n", | ||
"print(\"Top-Selling product : \",product_name,\"With total quantity sold \",total_quantity_sold)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "e45de51e-f54b-4788-8fb3-2aadc9143533", | ||
"metadata": {}, | ||
"source": [ | ||
"## Top selling Location" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"id": "923bf8d1-6869-4448-9916-80e4f1b6e3f0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Updated pipeline to exclude \"Online\" and get top-selling location\n", | ||
"pipeline = [\n", | ||
" {\"$lookup\":\n", | ||
" {\n", | ||
" \"from\": \"custs\",\n", | ||
" \"localField\": \"customer.email\",\n", | ||
" \"foreignField\": \"email\",\n", | ||
" \"as\": \"transaction_links\",\n", | ||
" }\n", | ||
" },\n", | ||
" {\"$match\": {\"store_location\": {\"$ne\": \"Online\"}}}, # Exclude Online location\n", | ||
" {\"$limit\": 100},\n", | ||
" {\"$group\":\n", | ||
" {\n", | ||
" \"_id\": {\"location\": \"$store_location\"},\n", | ||
" \"count\": {\"$sum\": 1}\n", | ||
" }\n", | ||
" },\n", | ||
" {\"$sort\": {\"count\": -1}},\n", | ||
" {\"$limit\": 1}\n", | ||
"]\n", | ||
"\n", | ||
"\n", | ||
"s2_result = s2mongotxs.aggregate(pipeline)\n", | ||
"\n", | ||
"\n", | ||
"# Retrieve the top-selling location excluding \"Online\"\n", | ||
"top_location = next(s2_result, None)\n", | ||
"if top_location:\n", | ||
" location_name = top_location[\"_id\"][\"location\"]\n", | ||
" transaction_count = top_location[\"count\"]\n", | ||
"else:\n", | ||
" location_name = \"No Data\"\n", | ||
" transaction_count = 0\n", | ||
"\n", | ||
"# Return the top-selling location and transaction count\n", | ||
"\n", | ||
"print(\"Top-Selling Location : \",location_name,\"With transaction of Count \",transaction_count)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "93934fde-c22e-4bda-992f-ed01dc83283c", | ||
"metadata": {}, | ||
"source": [ | ||
"## Clean up" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "599ca6e3-3847-467a-8a33-8f91e52a9cd1", | ||
"metadata": {}, | ||
"source": [ | ||
"<div class=\"alert alert-block alert-warning\">\n", | ||
" <b class=\"fa fa-solid fa-exclamation-circle\"></b>\n", | ||
" <div>\n", | ||
" <p><b>Action Required</b></p>\n", | ||
" <p> If you created a new database in your Standard or Premium Workspace, you can drop the database by running the cell below. Note: this will not drop your database for Free Starter Workspaces. To drop a Free Starter Workspace, terminate the Workspace using the UI. </p>\n", | ||
" </div>\n", | ||
"</div>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 10, | ||
"id": "2f4cf3c7-5e1f-442e-8b6e-e4f106ded82b", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"shared_tier_check = %sql show variables like 'is_shared_tier'\n", | ||
"if not shared_tier_check or shared_tier_check[0][1] == 'OFF':\n", | ||
" %sql DROP DATABASE IF EXISTS new_transactions;" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "9635adf8-8137-4637-b94d-22835ba8112d", | ||
"metadata": {}, | ||
"source": [ | ||
"<div id=\"singlestore-footer\" style=\"background-color: rgba(194, 193, 199, 0.25); height:2px; margin-bottom:10px\"></div>\n", | ||
"<div><img src=\"https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/singlestore-logo-grey.png\" style=\"padding: 0px; margin: 0px; height: 24px\"/></div>" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"jupyterlab": { | ||
"notebooks": { | ||
"version_major": 6, | ||
"version_minor": 4 | ||
} | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.6" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |