Fix notebooks; add check for attachments=null

singlestore-labs · Oct 5, 2023 · 6516b8b · 6516b8b
1 parent c8a9318
commit 6516b8b
Show file tree

Hide file tree

Showing 6 changed files with 1,585 additions and 17 deletions.
diff --git a/notebooks/load-json-files-s3/meta.toml b/notebooks/load-json-files-s3/meta.toml
@@ -1,9 +1,11 @@
 [meta]
 title="Load JSON files with Pipeline from S3"
-description="This notebook will help you load JSON files from a public open AWS S3 bucket. You will see two modes:
-*) where you map the JSON elements to columns in a relational table
-*) where you just ingest all documents ito a JSON column. In that mode we also show how you can use persisted computed column for extracting JSON fields
-"
+description="""\
+    This notebook will help you load JSON files from a public open
+    AWS S3 bucket. You will see two modes:
+        *) where you map the JSON elements to columns in a relational table
+        *) where you just ingest all documents ito a JSON column. In that mode we also show how you can use persisted computed column for extracting JSON fields
+"""
 icon="chart-network"
 tags=["pipeline", "json", "s3"]
-destinations=["spaces"]
+destinations=["spaces"]
diff --git a/notebooks/load-json-files-s3/notebook.ipynb b/notebooks/load-json-files-s3/notebook.ipynb
diff --git a/notebooks/optimize-performance-with-tpch-100/meta.toml b/notebooks/optimize-performance-with-tpch-100/meta.toml
@@ -1,12 +1,17 @@
 [meta]
 title="Learn how to Optimize Performance with TPCH 100"
-description="This notebook will help you understand how you can take advantage of SingleStoreDB distributed capability using TPCH-100. We recommend using a S2 or S4 workspace to see the difference in performance.
-If you come from single node database, this is an important step to follow to scale your performance linearly as your data grows.
+description="""\
+    This notebook will help you understand how you can take advantage of
+    SingleStoreDB distributed capability using TPCH-100. We recommend using
+    a S2 or S4 workspace to see the difference in performance.
 
-You will see two areas:
-*) Ingesting data using pipeline at a super fast speed (You will use a real-time embedded dashboard)
-*) Compare query performance with an unoptimized database and an optimized database
-"
+    If you come from single node database, this is an important step to follow
+    to scale your performance linearly as your data grows.
+
+    You will see two areas:
+        *) Ingesting data using pipeline at a super fast speed (You will use a real-time embedded dashboard)
+        *) Compare query performance with an unoptimized database and an optimized database
+"""
 icon="database"
 tags=["performance", "benchmark", "tpch", "benchmark", "shardkey", "ingest"]
-destinations=["spaces"]
+destinations=["spaces"]
diff --git a/notebooks/optimize-performance-with-tpch-100/notebook.ipynb b/notebooks/optimize-performance-with-tpch-100/notebook.ipynb
diff --git a/resources/nb-check.py b/resources/nb-check.py
@@ -167,8 +167,9 @@ def new_markdown_cell(cell_id: str, content: list[str]) -> dict[str, Any]:
             footer_cell = cells.pop(-1)
             footer_id = footer_cell.get('id', footer_id)
 
-    # Convert source lists to a string
     for cell in cells:
+
+        # Convert source lists to a string
         source = cell.get('source', [])
         if isinstance(source, list):
             source = ''.join(source)
@@ -178,6 +179,10 @@ def new_markdown_cell(cell_id: str, content: list[str]) -> dict[str, Any]:
             source = []
         cell['source'] = source
 
+        # Remove "attachments": null (not sure how they get in there)
+        if 'attachments' in cell and cell['attachments'] is None:
+            cell['attachments'] = {}
+
     # Prepare parameter substitutions for header
     try:
         icon_name = toml_info['meta']['icon']

diff --git a/resources/toml-check.py b/resources/toml-check.py
@@ -35,8 +35,8 @@ def error(msg):
     if [x.lower() for x in tags] != tags:
         error(f'Tags must be in all lower-case ({tags}) in {f}')
 
-    if [re.sub(r'[^a-z]', r'', x) for x in tags] != tags:
-        error(f'Tags can only contain letters ({tags}) in {f}')
+    if [re.sub(r'[^a-z0-9]', r'', x) for x in tags] != tags:
+        error(f'Tags can only contain letters and numbers ({tags}) in {f}')
 
     # Currently only "spaces" is allowed in destinations
     destinations = meta.get('destinations', [])