Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
Standardize structs and add error handling (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
SimranMakhija7 authored May 2, 2024
1 parent a9bad65 commit e5208f6
Show file tree
Hide file tree
Showing 29 changed files with 1,628 additions and 998 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ Cargo.lock

# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb
test/
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions .idea/15721-s24-catalog1.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions .idea/material_theme_project_new.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ serde_json = "1.0"
tower-http = { version = "0.4.0", features = ["full"] }
dotenv = "0.15.0"
rocksdb = "0.22.0"
anyhow = "1.0.82"
typed-builder = "0.14.0"
uuid = "1.8.0"

pretty_assertions = "0.7"
select = "0.5"
Expand Down
161 changes: 161 additions & 0 deletions benchmark_copy/bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3
# This script is used to benchmark the catalog server.
# It will start the catalog server, seed the catalog with some namespaces and tables, and use vegeta to stress test the server.
# vegeta: https://github.com/tsenart/vegeta
# Install on mac: brew install vegeta

import subprocess as sp
import time
import signal
import sys
import requests
import argparse
import string
import random


def get_random_str(length=8):
letters = string.ascii_lowercase
return ''.join(random.choice(letters) for _ in range(length))


def run(cmd, note, bg=False, out=None):
print(f"{note.ljust(48)}...", end=" ", flush=True)
try:
res = None
if out:
with open(out, "a") as f:
if bg:
res = sp.Popen(cmd, shell=True, stdout=f, stderr=f)
else:
sp.run(cmd, shell=True, check=True,
stdout=f, stderr=f)
else:
if bg:
res = sp.Popen(cmd, stdout=sp.DEVNULL, stderr=sp.DEVNULL)
else:
sp.run(cmd, shell=True, check=True,
stdout=sp.DEVNULL, stderr=sp.DEVNULL)
print("DONE!")
return res
except sp.CalledProcessError as e:
print("FAIL!")
print("Error:", e)


TEST_ROOT_DIR = "test"
DEFAULT_BINARY_NAME = "catalog2"
DEFAULT_DB_ROOT_DIR = f"{TEST_ROOT_DIR}/db"
DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1/"
DEFAULT_NAMESPACE_NUM = 1
DEFAULT_TABLE_NUM = 1
DEFAULT_RATE = 8

parser = argparse.ArgumentParser(description="Benchmark.")
parser.add_argument("-b", "--binary_name", type=str,
default=DEFAULT_BINARY_NAME, help="Name of the catalog binary.")
parser.add_argument("-d", "--db_root", type=str,
default=DEFAULT_DB_ROOT_DIR, help="Root directory for the database.")
parser.add_argument("-u", "--base_url", type=str,
default=DEFAULT_BASE_URL, help="Base URL for catalog server.")
parser.add_argument("-n", "--namespace_num", type=int,
default=DEFAULT_NAMESPACE_NUM, help="The number of namespace to seed in catalog.")
parser.add_argument("-t", "--table_num", type=int,
default=DEFAULT_TABLE_NUM, help="The number of table to seed in catalog.")
parser.add_argument("-r", "--rate", type=int,
default=DEFAULT_RATE, help="Request rate.")
parser.add_argument("-p", "--plot", action="store_true",
default=False, help="Generate a plot of this benchmark.")
args = parser.parse_args()


CATALOG_LOG = f"{TEST_ROOT_DIR}/catalog.log"

# build catalog in release mode
run(f"rm -rf {TEST_ROOT_DIR} && mkdir {TEST_ROOT_DIR}",
note="initializing test dir")
run(f"cargo build --release && cp target/release/{args.binary_name} {TEST_ROOT_DIR}/{args.binary_name}",
note="building catalog in release mode")
catalog_server = run(f"{TEST_ROOT_DIR}/{args.binary_name} --db-root {args.db_root}",
note="starting catalog server", bg=True, out=CATALOG_LOG)
print("Waiting for catalog server to start...")
time.sleep(1)

# seeding the catalog, uniformly distribute tables to namespaces
print(f"Seeding namespaces and tables...")
NAMESPACE_ENDPOINT = "namespaces"
TABLE_ENDPOINT = "tables"
namespaces = []
table_per_namespace = args.table_num // args.namespace_num
for i in range(args.namespace_num):
namespace = get_random_str(32)
tables = []
for j in range(table_per_namespace):
tables.append(get_random_str(32))
namespaces.append({'name': namespace, 'tables': tables})
# create namespace
response = requests.post(f"{args.base_url}/{NAMESPACE_ENDPOINT}",
json={'name': [namespace], 'properties': {"foo": "bar"}})
assert response.status_code == 200, f"Failed to create namespace {namespace}"

# crate tables
for table in tables:
response = requests.post(
f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace}/{TABLE_ENDPOINT}",
json={'name': table}
)
assert response.status_code == 201, f"Failed to create table in {namespace}"

print(f"Seeded {len(namespaces)} namespaces and {len(namespaces) * table_per_namespace} tables.")

# test begins
# 1. single endpoint stress test
namespace = namespaces[0]
table = namespace['tables'][0]
targets = {
"get_table": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}/{TABLE_ENDPOINT}/{table}",
"list_table": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}/{TABLE_ENDPOINT}",
"get_namespace": f"{args.base_url}/{NAMESPACE_ENDPOINT}/{namespace['name']}",
"list_namespace": f"{args.base_url}/{NAMESPACE_ENDPOINT}"
}

for name, target in targets.items():
STATISTIC_FILE = f"{TEST_ROOT_DIR}/results_{name}.bin"
attack = f"echo 'GET {target}' | vegeta attack -rate={args.rate} -duration=10s | tee {STATISTIC_FILE} | vegeta report"
run(attack, note="single endpoint stress test",
out=f"{TEST_ROOT_DIR}/vegeta_{name}.log")
if args.plot:
PLOT_FILE = f"{TEST_ROOT_DIR}/plot_{name}.html"
run(f"cat {STATISTIC_FILE} | vegeta plot > {PLOT_FILE}",
note="generating plot")
# ... more?
# 2. random endpoint stress test
# Define the file path
PATH_TARGET_FILE = f"{TEST_ROOT_DIR}/requests_get_table.txt"

# Write the URLs to the file
with open(PATH_TARGET_FILE, "w") as file:
for i in range(len(namespaces)):
random_namespace = random.choice(namespaces)
random_table = random.choice(random_namespace['tables'])

# Generate request URL
target = f"{args.base_url}/{NAMESPACE_ENDPOINT}/{random_namespace['name']}/{TABLE_ENDPOINT}/{random_table}"
request_url = f"GET {target}"

file.write(request_url + "\n")

print("URLs have been written to", PATH_TARGET_FILE)


STATISTIC_FILE = f"{TEST_ROOT_DIR}/results_random.bin"
attack = f"vegeta attack -targets={PATH_TARGET_FILE} -rate={args.rate} -duration=60s | tee {STATISTIC_FILE} | vegeta report"
run(attack, note="random endpoints stress test",
out=f"{TEST_ROOT_DIR}/vegeta_random.log")
if args.plot:
PLOT_FILE = f"{TEST_ROOT_DIR}/plot_random.html"
run(f"cat {STATISTIC_FILE} | vegeta plot > {PLOT_FILE}",
note="generating plot")

# clean up
catalog_server.send_signal(signal.SIGINT)
42 changes: 42 additions & 0 deletions benchmark_copy/parse_dependencies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import sys

begin = False
package_version = {}
with open('./Cargo.toml') as f:
for line in f:
if '[' == line[0]:
begin = False
if 'dependencies' in line:
begin = True
continue

if begin:
sep = line.find('=')
package_version[line[:sep-1].strip()] = line[sep+2:].strip()

for dir_path in ["./libs/iceberg/", "./libs/rest/", "./libs/test_utils/"]:
r = open(dir_path + "Cargo.toml")
w = open(dir_path + "Cargo_n.toml", 'w')
begin = False
for r_line in r:
if '[' == r_line[0]:
begin = False
if 'dependencies' in r_line:
begin = True
w.write(r_line)
continue

if begin:
sep = r_line.find('=')
package = r_line[:sep-1].strip()
if package in package_version:
w.writelines([f"{package} = {package_version[package]}", "\n"])
else:
w.write(r_line)
else:
w.write(r_line)
r.close()
w.close()
os.remove(dir_path + "Cargo.toml")
os.rename(dir_path + "Cargo_n.toml", dir_path + "Cargo.toml")
Loading

0 comments on commit e5208f6

Please sign in to comment.