Remove benchmarking comment bot; Run benches weekly and save results …

…as an artifact (#1870) The benchmark comments have generally been pretty noisy and I think unpopular, especially when iterating on one open PR. This PR removes the current benchmark commenting functionality and instead runs a once-weekly (Monday morning at midnight) benchmark on `main` and saves the results as JSON. These results can then be easily interpreted by a separate tool that I'm putting together. This will help us catch any performance regressions or trends, but in a once-weekly retrospective report of all merges into main, instead of every single commit. The key thing here is that after this PR goes in, we will have a mechanism to retrospect over performance data without having it commented on every commit of every PR.
microsoft · Aug 22, 2024 · 1700256 · 1700256
1 parent 89ba8c9
commit 1700256
Show file tree

Hide file tree

Showing 3 changed files with 132 additions and 70 deletions.
diff --git a/.github/workflows/bench-reports.yml b/.github/workflows/bench-reports.yml
@@ -1,90 +1,41 @@
 name: Benchmark Reports
 
 on:
-  pull_request:
-    branches: "main"
-    types:
-      - opened
-      - reopened
-      - synchronize
-      - ready_for_review
-  merge_group:
-  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * MON"
 
 env:
   CARGO_TERM_COLOR: always
   NODE_VERSION: "18.17.1"
   PYTHON_VERSION: "3.11"
   RUST_TOOLCHAIN_VERSION: "1.80"
-  RUST_TOOLCHAIN_COMPONENTS: rustfmt clippy
 
 jobs:
   runBenchmark:
-    if: ${{ !github.event.pull_request.draft }}
-    name: run benchmark
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      pull-requests: write
-    steps:
-      - uses: actions/checkout@v3
-      - uses: boa-dev/criterion-compare-action@v3
-        with:
-          branchName: ${{ github.base_ref }}
-          cwd: "compiler/qsc"
-        if: ${{ github.base_ref != null }}
-
-  runMemoryProfile:
-    if: ${{ !github.event.pull_request.draft }}
-    name: run memory profile
     runs-on: ubuntu-latest
+    name: run benchmark
     permissions:
       contents: read
       pull-requests: write
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
         with:
+          fetch-depth: 0
           ref: main
-      - uses: Swatinem/rust-cache@v2
-      - run: |
-          MAIN_MEASUREMENT=$(cargo run --bin memtest)
-          echo "MAIN_MEASUREMENT<<EOF" >> $GITHUB_ENV
-          echo "$MAIN_MEASUREMENT" >> $GITHUB_ENV
-          echo "EOF" >> $GITHUB_ENV
-      - run: |
-          echo "${{env.MAIN_MEASUREMENT}}"
-          echo $MAIN_MEASUREMENT
-
-      - uses: actions/checkout@v2
-      - run: |
-          BRANCH_MEASUREMENT=$(cargo run --bin memtest)
-          echo "BRANCH_MEASUREMENT<<EOF" >> $GITHUB_ENV
-          echo "$BRANCH_MEASUREMENT" >> $GITHUB_ENV
-          echo "EOF" >> $GITHUB_ENV
-      - run: |
-          echo "${{env.BRANCH_MEASUREMENT}}"
-          echo $BRANCH_MEASUREMENT
-      - uses: actions/github-script@v6
+      - uses: actions/setup-python@v4
         with:
-          script: |
-            if (${{ env.BRANCH_MEASUREMENT }} !== ${{ env.MAIN_MEASUREMENT }}) {
-              const difference = ${{ env.BRANCH_MEASUREMENT }} - ${{ env.MAIN_MEASUREMENT }};
-              try {
-                await github.rest.issues.createComment({
-                  issue_number: context.issue.number,
-                  owner: context.repo.owner,
-                  repo: context.repo.repo,
-                  body: `_Change in memory usage detected by benchmark._
-            ## Memory Report for ${{ github.sha }}
-
-            | Test                        | This Branch | On Main  | Difference |
-            |-----------------------------|-------------|----------| ---------- |
-            | compile core + standard lib | ${{ env.BRANCH_MEASUREMENT }} bytes | ${{ env.MAIN_MEASUREMENT }} bytes | ${difference} bytes`
-                })
-              } catch (err) {
-                core.warning(`Failed writing comment on GitHub issue: ${err}`)
-              }
-            } else {
-              console.log("no change in memory usage detected by benchmark");
-            }
-        if: ${{ github.base_ref != null }}
+          python-version: ${{ env.PYTHON_VERSION }}
+      - name: setup rust
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: ${{ env.RUST_TOOLCHAIN_VERSION }}
+      - name: setup cargo criterion
+        run: cargo install cargo-criterion
+      - name: run benching script
+        run: ./build.py --ci-bench --no-check-prereqs
+      - name: preserve bench artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmarks
+          path: |
+            *.json
diff --git a/.github/workflows/memory_profile.yml b/.github/workflows/memory_profile.yml
@@ -0,0 +1,75 @@
+name: Benchmark Reports
+
+on:
+  pull_request:
+    branches: "main"
+    types:
+      - opened
+      - reopened
+      - synchronize
+      - ready_for_review
+  merge_group:
+  workflow_dispatch:
+
+env:
+  CARGO_TERM_COLOR: always
+  NODE_VERSION: "18.17.1"
+  PYTHON_VERSION: "3.11"
+  RUST_TOOLCHAIN_VERSION: "1.80"
+  RUST_TOOLCHAIN_COMPONENTS: rustfmt clippy
+
+jobs:
+  runMemoryProfile:
+    if: ${{ !github.event.pull_request.draft }}
+    name: run memory profile
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          ref: main
+      - uses: Swatinem/rust-cache@v2
+      - run: |
+          MAIN_MEASUREMENT=$(cargo run --bin memtest)
+          echo "MAIN_MEASUREMENT<<EOF" >> $GITHUB_ENV
+          echo "$MAIN_MEASUREMENT" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+      - run: |
+          echo "${{env.MAIN_MEASUREMENT}}"
+          echo $MAIN_MEASUREMENT
+
+      - uses: actions/checkout@v2
+      - run: |
+          BRANCH_MEASUREMENT=$(cargo run --bin memtest)
+          echo "BRANCH_MEASUREMENT<<EOF" >> $GITHUB_ENV
+          echo "$BRANCH_MEASUREMENT" >> $GITHUB_ENV
+          echo "EOF" >> $GITHUB_ENV
+      - run: |
+          echo "${{env.BRANCH_MEASUREMENT}}"
+          echo $BRANCH_MEASUREMENT
+      - uses: actions/github-script@v6
+        with:
+          script: |
+            if (${{ env.BRANCH_MEASUREMENT }} !== ${{ env.MAIN_MEASUREMENT }}) {
+              const difference = ${{ env.BRANCH_MEASUREMENT }} - ${{ env.MAIN_MEASUREMENT }};
+              try {
+                await github.rest.issues.createComment({
+                  issue_number: context.issue.number,
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  body: `_Change in memory usage detected by benchmark._
+            ## Memory Report for ${{ github.sha }}
+
+            | Test                        | This Branch | On Main  | Difference |
+            |-----------------------------|-------------|----------| ---------- |
+            | compile core + standard lib | ${{ env.BRANCH_MEASUREMENT }} bytes | ${{ env.MAIN_MEASUREMENT }} bytes | ${difference} bytes`
+                })
+              } catch (err) {
+                core.warning(`Failed writing comment on GitHub issue: ${err}`)
+              }
+            } else {
+              console.log("no change in memory usage detected by benchmark");
+            }
+        if: ${{ github.base_ref != null }}
diff --git a/build.py b/build.py
@@ -68,6 +68,13 @@
     help="Build and run the integration tests (default is --no-integration-tests)",
 )
 
+parser.add_argument(
+        "--ci-bench",
+        action=argparse.BooleanOptionalAction,
+        default=False,
+        help="Run the benchmarking script that is run in CI (default is --no-ci-bench)",
+)
+
 args = parser.parse_args()
 
 if args.check_prereqs:
@@ -83,6 +90,7 @@
     and not args.play
     and not args.vscode
     and not args.jupyterlab
+    and not args.ci_bench
 )
 build_cli = build_all or args.cli
 build_pip = build_all or args.pip
@@ -92,6 +100,7 @@
 build_play = build_all or args.play
 build_vscode = build_all or args.vscode
 build_jupyterlab = build_all or args.jupyterlab
+ci_bench = args.ci_bench
 
 # JavaScript projects and eslint, prettier depend on npm_install
 # However the JupyterLab extension uses yarn in a separate workspace
@@ -291,6 +300,28 @@ def run_python_integration_tests(cwd, interpreter):
     subprocess.run(command_args, check=True, text=True, cwd=cwd)
 
 
+def run_ci_historic_benchmark():
+    branch = "main"
+    output = subprocess.check_output(
+        ["git", "rev-list", "--since=1 week ago", "--pretty=format:%ad__%h", "--date=short", branch]
+    ).decode("utf-8")
+    print('\n'.join([line for i, line in enumerate(output.split('\n')) if i % 2 == 1]))
+
+    output = subprocess.check_output(
+        ["git", "rev-list", "--since=1 week ago", "--pretty=format:%ad__%h", "--date=short", branch]
+    ).decode("utf-8")
+    date_and_commits = [line for i, line in enumerate(output.split('\n')) if i % 2 == 1]
+
+    for date_and_commit in date_and_commits:
+        print("benching commit", date_and_commit)
+        result = subprocess.run(
+            ["cargo", "criterion", "--message-format=json", "--history-id", date_and_commit],
+            capture_output=True,
+            text=True
+        )
+        with open(f"{date_and_commit}.json", "w") as f:
+            f.write(result.stdout)
+
 if build_pip:
     step_start("Building the pip package")
 
@@ -529,3 +560,8 @@ def run_python_integration_tests(cwd, interpreter):
     for test_project_dir in test_projects_directories:
         run_python_tests(test_project_dir, python_bin)
     step_end()
+
+if ci_bench:
+    step_start("Running CI benchmarking script")
+    run_ci_historic_benchmark()
+    step_end()