Skip to content

Commit

Permalink
Early version of profiler harness
Browse files Browse the repository at this point in the history
Include a basic benchmark as the starting point and needed scripts
  • Loading branch information
beroy committed Feb 1, 2024
1 parent 61c8501 commit 6f765ef
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 0 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/profiler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Profiler

on:
pull_request:

jobs:
run_profiler:
name: Run Profiler
runs-on: ubuntu-latest
permissions: # these permissions must be set for AWS auth to work!
id-token: write
contents: read

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1

- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
role-session-name: PushDockerImage

- name: Run all tests
run: |
python -m venv profiler_env
source profiler_env/bin/activate
./tools/perf_checker/perf_checker.sh
44 changes: 44 additions & 0 deletions tools/perf_checker/perf_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import profiler
import argparse

# The script takes a command and a database path and looks
# the performance anomalies in the performance history of that
# command across the profiled runs.

parser = argparse.ArgumentParser()
parser.add_argument("benchmark", type=str)
parser.add_argument("db_path", type=str)

args = parser.parse_args()

# Processes the set of previously written logs
# The threshold (ratio) of allowable performance degradation between profiling runs
threshold = 1.10 # Percent difference

db = profiler.data.FileBasedProfileDB(args.db_path)
dt = db.find(f"{args.benchmark}")


if len(dt) >= 2:
first_profile = dt[0]
curr_profile = dt[len(dt) - 1]
first_time = first_profile.user_time_sec
curr_time = curr_profile.user_time_sec

formatted_first_profile = str(first_profile).replace('\\n', '\n').replace('\\t', '\t')
formatted_curr_profile = str(curr_profile).replace('\\n', '\n').replace('\\t', '\t')

if float(curr_time) > threshold * float(first_time):
print(f"*** First profile:\n {formatted_first_profile}")
print(f"*** Current profile:\n {formatted_curr_profile}")
print(f"Major performance increase detected on {args.benchmark}: curr: {first_time} vs first: {curr_time}")
raise SystemExit(f"Potential performance degradation detected on {args.benchmark}: curr: {first_time} vs first: {curr_time}")

if threshold * float(curr_time) < float(first_time):
print(f"Major performance increase detected on {args.benchmark}: curr: {first_time} vs first: {curr_time}")

print(f"*** First profile:\n {formatted_first_profile}")
print(f"*** Current profile:\n {formatted_curr_profile}")
print(
f"TileDB version ver = first: {first_profile.tiledbsoma_version} curr: {curr_profile.tiledbsoma_version}"
)
46 changes: 46 additions & 0 deletions tools/perf_checker/perf_checker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
.sh
#!/bin/sh
set -euox pipefail

# Installing the requirements
python -m venv perf
source perf/bin/activate
pip install psutil
pip install gitpython
pip install somacore
pip install tiledbsoma
pip install cellxgene_census

# Installing mount-s3
sudo wget https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb
sudo apt install -y ./mount-s3.deb

# Setting up mount-s3. We use S3 file system as it is necessary to persist the
# profiling run data that are performed below
mkdir ./census-profiler-tests
mkdir ./s3_cache
mount-s3 census-profiler-tests ./census-profiler-tests --cache ./s3_cache --metadata-ttl 300
dbpath=`pwd`/census-profiler-tests

# New benchmarks must be added to this list
declare -a benchmarks=("./tools/perf_checker/benchmark1.py")

# Download the repo including the profiler
git clone https://github.com/single-cell-data/TileDB-SOMA.git
# Downloading TileDB-SOMA (remove the next line once the branch is merged)
cd TileDB-SOMA
git checkout census_profiler
pip install profiler/
pip list | grep profiler
cd ../

# Download gnu time tool
sudo apt-get update -y
sudo apt-get install -y time

# Running all benchmarks and checking performance changes
for benchmark in ${benchmarks}
do
python ./TileDB-SOMA/profiler/profiler.py "python ${benchmark}" $dbpath
python ./tools/pref_checker/perf_checker.py "python ${benchmark}" $dbpath
done
20 changes: 20 additions & 0 deletions tools/perf_checker/test_anndata_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from sys import stderr
from time import perf_counter

import cellxgene_census
import tiledbsoma as soma

print("Starting bm 1", file=stderr)
census_S3_latest = dict(census_version="2024-01-01")


def main():
with cellxgene_census.open_soma(**census_S3_latest) as census:
with census["census_data"]["homo_sapiens"].axis_query(
measurement_name="RNA",
obs_query=soma.AxisQuery(value_filter="""tissue_general == 'hand'"""),
) as query:
query.to_anndata(X_name="raw")


main()

0 comments on commit 6f765ef

Please sign in to comment.