From 6f765ef7fa608ad05be9df625d8005aec4d7646a Mon Sep 17 00:00:00 2001 From: Behnam Robatmili Date: Fri, 19 Jan 2024 11:02:03 -0800 Subject: [PATCH] Early version of profiler harness Include a basic benchmark as the starting point and needed scripts --- .github/workflows/profiler.yml | 30 +++++++++++++++ tools/perf_checker/perf_checker.py | 44 ++++++++++++++++++++++ tools/perf_checker/perf_checker.sh | 46 +++++++++++++++++++++++ tools/perf_checker/test_anndata_export.py | 20 ++++++++++ 4 files changed, 140 insertions(+) create mode 100644 .github/workflows/profiler.yml create mode 100644 tools/perf_checker/perf_checker.py create mode 100755 tools/perf_checker/perf_checker.sh create mode 100644 tools/perf_checker/test_anndata_export.py diff --git a/.github/workflows/profiler.yml b/.github/workflows/profiler.yml new file mode 100644 index 000000000..a86123eb7 --- /dev/null +++ b/.github/workflows/profiler.yml @@ -0,0 +1,30 @@ +name: Profiler + +on: + pull_request: + +jobs: + run_profiler: + name: Run Profiler + runs-on: ubuntu-latest + permissions: # these permissions must be set for AWS auth to work! + id-token: write + contents: read + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-region: us-west-2 + role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }} + role-session-name: PushDockerImage + + - name: Run all tests + run: | + python -m venv profiler_env + source profiler_env/bin/activate + ./tools/perf_checker/perf_checker.sh \ No newline at end of file diff --git a/tools/perf_checker/perf_checker.py b/tools/perf_checker/perf_checker.py new file mode 100644 index 000000000..72221f07e --- /dev/null +++ b/tools/perf_checker/perf_checker.py @@ -0,0 +1,44 @@ +import profiler +import argparse + +# The script takes a command and a database path and looks +# the performance anomalies in the performance history of that +# command across the profiled runs. + +parser = argparse.ArgumentParser() +parser.add_argument("benchmark", type=str) +parser.add_argument("db_path", type=str) + +args = parser.parse_args() + +# Processes the set of previously written logs +# The threshold (ratio) of allowable performance degradation between profiling runs +threshold = 1.10 # Percent difference + +db = profiler.data.FileBasedProfileDB(args.db_path) +dt = db.find(f"{args.benchmark}") + + +if len(dt) >= 2: + first_profile = dt[0] + curr_profile = dt[len(dt) - 1] + first_time = first_profile.user_time_sec + curr_time = curr_profile.user_time_sec + + formatted_first_profile = str(first_profile).replace('\\n', '\n').replace('\\t', '\t') + formatted_curr_profile = str(curr_profile).replace('\\n', '\n').replace('\\t', '\t') + + if float(curr_time) > threshold * float(first_time): + print(f"*** First profile:\n {formatted_first_profile}") + print(f"*** Current profile:\n {formatted_curr_profile}") + print(f"Major performance increase detected on {args.benchmark}: curr: {first_time} vs first: {curr_time}") + raise SystemExit(f"Potential performance degradation detected on {args.benchmark}: curr: {first_time} vs first: {curr_time}") + + if threshold * float(curr_time) < float(first_time): + print(f"Major performance increase detected on {args.benchmark}: curr: {first_time} vs first: {curr_time}") + + print(f"*** First profile:\n {formatted_first_profile}") + print(f"*** Current profile:\n {formatted_curr_profile}") + print( + f"TileDB version ver = first: {first_profile.tiledbsoma_version} curr: {curr_profile.tiledbsoma_version}" + ) diff --git a/tools/perf_checker/perf_checker.sh b/tools/perf_checker/perf_checker.sh new file mode 100755 index 000000000..4687e548c --- /dev/null +++ b/tools/perf_checker/perf_checker.sh @@ -0,0 +1,46 @@ +.sh +#!/bin/sh +set -euox pipefail + +# Installing the requirements +python -m venv perf +source perf/bin/activate +pip install psutil +pip install gitpython +pip install somacore +pip install tiledbsoma +pip install cellxgene_census + +# Installing mount-s3 +sudo wget https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb +sudo apt install -y ./mount-s3.deb + +# Setting up mount-s3. We use S3 file system as it is necessary to persist the +# profiling run data that are performed below +mkdir ./census-profiler-tests +mkdir ./s3_cache +mount-s3 census-profiler-tests ./census-profiler-tests --cache ./s3_cache --metadata-ttl 300 +dbpath=`pwd`/census-profiler-tests + +# New benchmarks must be added to this list +declare -a benchmarks=("./tools/perf_checker/benchmark1.py") + +# Download the repo including the profiler +git clone https://github.com/single-cell-data/TileDB-SOMA.git +# Downloading TileDB-SOMA (remove the next line once the branch is merged) +cd TileDB-SOMA +git checkout census_profiler +pip install profiler/ +pip list | grep profiler +cd ../ + +# Download gnu time tool +sudo apt-get update -y +sudo apt-get install -y time + +# Running all benchmarks and checking performance changes +for benchmark in ${benchmarks} +do + python ./TileDB-SOMA/profiler/profiler.py "python ${benchmark}" $dbpath + python ./tools/pref_checker/perf_checker.py "python ${benchmark}" $dbpath +done \ No newline at end of file diff --git a/tools/perf_checker/test_anndata_export.py b/tools/perf_checker/test_anndata_export.py new file mode 100644 index 000000000..545440e41 --- /dev/null +++ b/tools/perf_checker/test_anndata_export.py @@ -0,0 +1,20 @@ +from sys import stderr +from time import perf_counter + +import cellxgene_census +import tiledbsoma as soma + +print("Starting bm 1", file=stderr) +census_S3_latest = dict(census_version="2024-01-01") + + +def main(): + with cellxgene_census.open_soma(**census_S3_latest) as census: + with census["census_data"]["homo_sapiens"].axis_query( + measurement_name="RNA", + obs_query=soma.AxisQuery(value_filter="""tissue_general == 'hand'"""), + ) as query: + query.to_anndata(X_name="raw") + + +main()