Standardize structs and add error handling (#9)

cmu-db · May 2, 2024 · e5208f6 · e5208f6
1 parent a9bad65
commit e5208f6
Show file tree

Hide file tree

Showing 29 changed files with 1,628 additions and 998 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ Cargo.lock
 
 # MSVC Windows builds of rustc generate these, which store debugging information
 *.pdb
+test/
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/15721-s24-catalog1.iml b/.idea/15721-s24-catalog1.iml
diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,9 @@ serde_json = "1.0"
 tower-http = { version = "0.4.0", features = ["full"] }
 dotenv = "0.15.0"
 rocksdb = "0.22.0"
+anyhow = "1.0.82"
+typed-builder = "0.14.0"
+uuid = "1.8.0"
 
 pretty_assertions = "0.7"
 select = "0.5"

diff --git a/benchmark_copy/bench.py b/benchmark_copy/bench.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+# This script is used to benchmark the catalog server.
+# It will start the catalog server, seed the catalog with some namespaces and tables, and use vegeta to stress test the server.
+# vegeta: https://github.com/tsenart/vegeta
+# Install on mac: brew install vegeta
+
+import subprocess as sp
+import time
+import signal
+import sys
+import requests
+import argparse
+import string
+import random
+
+
+def get_random_str(length=8):
+    letters = string.ascii_lowercase
+    return ''.join(random.choice(letters) for _ in range(length))
+
+
+def run(cmd, note, bg=False, out=None):
+    print(f"{note.ljust(48)}...", end=" ", flush=True)
+    try:
+        res = None
+        if out:
+            with open(out, "a") as f:
+                if bg:
+                    res = sp.Popen(cmd, shell=True, stdout=f, stderr=f)
+                else:
+                    sp.run(cmd, shell=True, check=True,
+                           stdout=f, stderr=f)
+        else:
+            if bg:
+                res = sp.Popen(cmd,   stdout=sp.DEVNULL, stderr=sp.DEVNULL)
+            else:
+                sp.run(cmd, shell=True, check=True,
+                       stdout=sp.DEVNULL, stderr=sp.DEVNULL)
+        print("DONE!")
+        return res
+    except sp.CalledProcessError as e:
+        print("FAIL!")
+        print("Error:", e)
+
+
+TEST_ROOT_DIR = "test"
+DEFAULT_BINARY_NAME = "catalog2"
+DEFAULT_DB_ROOT_DIR = f"{TEST_ROOT_DIR}/db"
+DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1/"
+DEFAULT_NAMESPACE_NUM = 1
+DEFAULT_TABLE_NUM = 1
+DEFAULT_RATE = 8
+
+parser = argparse.ArgumentParser(description="Benchmark.")
+parser.add_argument("-b", "--binary_name", type=str,
+                    default=DEFAULT_BINARY_NAME, help="Name of the catalog binary.")
+parser.add_argument("-d", "--db_root", type=str,
+                    default=DEFAULT_DB_ROOT_DIR, help="Root directory for the database.")
+parser.add_argument("-u", "--base_url", type=str,
+                    default=DEFAULT_BASE_URL, help="Base URL for catalog server.")
+parser.add_argument("-n", "--namespace_num", type=int,
+                    default=DEFAULT_NAMESPACE_NUM, help="The number of namespace to seed in catalog.")
+parser.add_argument("-t", "--table_num", type=int,
+                    default=DEFAULT_TABLE_NUM, help="The number of table to seed in catalog.")
+parser.add_argument("-r", "--rate", type=int,
+                    default=DEFAULT_RATE, help="Request rate.")
+parser.add_argument("-p", "--plot", action="store_true",
+                    default=False, help="Generate a plot of this benchmark.")
+args = parser.parse_args()
+
+
+CATALOG_LOG = f"{TEST_ROOT_DIR}/catalog.log"
+
+# build catalog in release mode
+run(f"rm -rf {TEST_ROOT_DIR} && mkdir {TEST_ROOT_DIR}",
+    note="initializing test dir")
+run(f"cargo build --release && cp target/release/{args.binary_name} {TEST_ROOT_DIR}/{args.binary_name}",
+    note="building catalog in release mode")
+catalog_server = run(f"{TEST_ROOT_DIR}/{args.binary_name} --db-root {args.db_root}",
+                     note="starting catalog server", bg=True, out=CATALOG_LOG)
+print("Waiting for catalog server to start...")
+time.sleep(1)
+
+# seeding the catalog, uniformly distribute tables to namespaces
+print(f"Seeding namespaces and tables...")
+NAMESPACE_ENDPOINT = "namespaces"
+TABLE_ENDPOINT = "tables"
+namespaces = []
+table_per_namespace = args.table_num // args.namespace_num
+for i in range(args.namespace_num):
+    namespace = get_random_str(32)
+    tables = []
+    for j in range(table_per_namespace):
+        tables.append(get_random_str(32))
+    namespaces.append({'name': namespace, 'tables': tables})
+    # create namespace
+    response = requests.post(f"{args.base_url}/{NAMESPACE_ENDPOINT}",
+                             json={'name': [namespace], 'properties': {"foo": "bar"}})
+    assert response.status_code == 200, f"Failed to create namespace {namespace}"
+
+    # crate tables
+    for table in tables:
+        response = requests.post(
+            f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace}/{TABLE_ENDPOINT}",
+            json={'name': table}
+        )
+        assert response.status_code == 201, f"Failed to create table in {namespace}"
+
+print(f"Seeded {len(namespaces)} namespaces and {len(namespaces) * table_per_namespace} tables.")
+
+# test begins
+# 1. single endpoint stress test
+namespace = namespaces[0]
+table = namespace['tables'][0]
+targets = {
+    "get_table": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}/{TABLE_ENDPOINT}/{table}",
+    "list_table": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}/{TABLE_ENDPOINT}",
+    "get_namespace": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}",
+    "list_namespace": f"{args.base_url}/{NAMESPACE_ENDPOINT}"
+}
+
+for name, target in targets.items():
+    STATISTIC_FILE = f"{TEST_ROOT_DIR}/results_{name}.bin"
+    attack = f"echo 'GET {target}' | vegeta attack -rate={args.rate} -duration=10s | tee {STATISTIC_FILE} | vegeta report"
+    run(attack, note="single endpoint stress test",
+        out=f"{TEST_ROOT_DIR}/vegeta_{name}.log")
+    if args.plot:
+        PLOT_FILE = f"{TEST_ROOT_DIR}/plot_{name}.html"
+        run(f"cat {STATISTIC_FILE} | vegeta plot > {PLOT_FILE}",
+            note="generating plot")
+# ... more?
+# 2. random endpoint stress test
+# Define the file path
+PATH_TARGET_FILE = f"{TEST_ROOT_DIR}/requests_get_table.txt"
+
+# Write the URLs to the file
+with open(PATH_TARGET_FILE, "w") as file:
+    for i in range(len(namespaces)):
+        random_namespace = random.choice(namespaces)
+        random_table = random.choice(random_namespace['tables'])
+
+        # Generate request URL
+        target = f"{args.base_url}/{NAMESPACE_ENDPOINT}/{random_namespace['name']}/{TABLE_ENDPOINT}/{random_table}"
+        request_url = f"GET {target}"
+
+        file.write(request_url + "\n")
+
+print("URLs have been written to", PATH_TARGET_FILE)
+
+
+STATISTIC_FILE = f"{TEST_ROOT_DIR}/results_random.bin"
+attack = f"vegeta attack -targets={PATH_TARGET_FILE} -rate={args.rate} -duration=60s | tee {STATISTIC_FILE} | vegeta report"
+run(attack, note="random endpoints stress test",
+    out=f"{TEST_ROOT_DIR}/vegeta_random.log")
+if args.plot:
+    PLOT_FILE = f"{TEST_ROOT_DIR}/plot_random.html"
+    run(f"cat {STATISTIC_FILE} | vegeta plot > {PLOT_FILE}",
+        note="generating plot")
+
+# clean up
+catalog_server.send_signal(signal.SIGINT)
diff --git a/benchmark_copy/parse_dependencies.py b/benchmark_copy/parse_dependencies.py
@@ -0,0 +1,42 @@
+import os
+import sys
+
+begin = False
+package_version = {}
+with open('./Cargo.toml') as f:
+    for line in f:
+        if '[' == line[0]:
+            begin = False
+        if 'dependencies' in line:
+            begin = True
+            continue
+
+        if begin:
+            sep = line.find('=')
+            package_version[line[:sep-1].strip()] = line[sep+2:].strip()
+
+for dir_path in ["./libs/iceberg/", "./libs/rest/", "./libs/test_utils/"]:
+    r = open(dir_path + "Cargo.toml")
+    w = open(dir_path + "Cargo_n.toml", 'w')
+    begin = False
+    for r_line in r:
+        if '[' == r_line[0]:
+            begin = False
+        if 'dependencies' in r_line:
+            begin = True
+            w.write(r_line)
+            continue
+
+        if begin:
+            sep = r_line.find('=')
+            package = r_line[:sep-1].strip()
+            if package in package_version:
+                w.writelines([f"{package} = {package_version[package]}", "\n"])
+            else:
+                w.write(r_line)
+        else:
+            w.write(r_line)
+    r.close()
+    w.close()
+    os.remove(dir_path + "Cargo.toml")
+    os.rename(dir_path + "Cargo_n.toml", dir_path + "Cargo.toml")