C2SP · woodruffw · Feb 15, 2024 · Feb 11, 2024 · Feb 13, 2024 · Feb 13, 2024
diff --git a/.github/actions/run-harnesses/action.yml b/.github/actions/run-harnesses/action.yml
@@ -0,0 +1,52 @@
+name: run harnesses
+description: Run x509-limbo's in-repo harnesses and cache their results
+outputs:
+  cache-hit:
+    description: "Whether or not the cache was hit"
+    value: "${{ steps.restore-cache.outputs.cache-hit }}"
+
+runs:
+  using: composite
+
+  steps:
+    # This action assumes that actions/checkout has run, leaving limbo.json
+    # in place for the cache check.
+
+    # NOTE: We generate the cache key upfront like this because the `harness/`
+    # subdirectory is not static: a cache miss means that builds will happen
+    # under it, changing its hash between the action's start and end.
+    - name: Compute cache key
+      id: compute-cache-key
+      shell: bash
+      run: |
+        echo "HARNESS_CACHE_KEY=limbo-harness-${LIMBO_HASH}-${HARNESS_HASH}" >> "${GITHUB_OUTPUT}"
+      env:
+        LIMBO_HASH: ${{ hashFiles('limbo.json') }}
+        HARNESS_HASH: ${{ hashFiles('harness/**') }}
+
+    - uses: actions/cache/restore@v4
+      id: restore-cache
+      with:
+        key: ${{ steps.compute-cache-key.outputs.HARNESS_CACHE_KEY }}
+        path: results
+
+    - uses: actions/setup-go@v5
+      if: steps.restore-cache.outputs.cache-hit != 'true'
+      with:
+        go-version: ">=1.20.5"
+
+    - name: setup limbo
+      shell: bash
+      if: steps.restore-cache.outputs.cache-hit != 'true'
+      run: make dev
+
+    - name: run harnesses
+      shell: bash
+      if: steps.restore-cache.outputs.cache-hit != 'true'
+      run: make test -j
+
+    - uses: actions/cache/save@v4
+      if: steps.restore-cache.outputs.cache-hit != 'true'
+      with:
+        key: ${{ steps.compute-cache-key.outputs.HARNESS_CACHE_KEY }}
+        path: results
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -63,7 +63,7 @@ jobs:
           go-version: ">=1.20.5"
 
       - name: check that harnesses build
-        run: make build-harnesses
+        run: make build-harnesses -j
 
   all-checks-pass:
     if: always()

diff --git a/.github/workflows/regression.yml b/.github/workflows/regression.yml
@@ -0,0 +1,30 @@
+name: Testcase regression detection
+
+on:
+  pull_request:
+
+jobs:
+  regressions:
+    runs-on: ubuntu-latest
+    permissions:
+      # Needed for regression comment generation.
+      issues: write
+      pull-requests: write
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ">=3.12"
+          cache: "pip"
+          cache-dependency-path: pyproject.toml
+
+      - uses: ./.github/actions/run-harnesses
+        id: run-harnesses
+
+      - name: run regression checks
+        run: make run ARGS="regression"
+        env:
+          GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
+
diff --git a/.github/workflows/site.yml b/.github/workflows/site.yml
@@ -13,32 +13,7 @@ concurrency:
   cancel-in-progress: false
 
 jobs:
-  run-all-harnesses:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: ">=3.12"
-          cache: "pip"
-          cache-dependency-path: pyproject.toml
-
-      - uses: actions/setup-go@v5
-        with:
-          go-version: ">=1.20.5"
-
-      - name: run harnesses
-        run: make test
-
-      - name: upload results
-        uses: actions/upload-artifact@v4
-        with:
-          name: results
-          path: results
-
   deploy:
-    needs: ["run-all-harnesses"]
     permissions:
       contents: read
       pages: write
@@ -51,16 +26,12 @@ jobs:
       - uses: actions/checkout@v4
 
       - uses: actions/setup-python@v5
-        with:
-          python-version: ">=3.12"
-          cache: "pip"
-          cache-dependency-path: pyproject.toml
+      with:
+        python-version: ">=3.12"
+        cache: "pip"
+        cache-dependency-path: pyproject.toml
 
-      - name: retrieve results
-        uses: actions/download-artifact@v4
-        with:
-          name: results
-          path: results
+      - uses: ./.github/actions/run-harnesses
 
       - name: build site
         run: make site

diff --git a/limbo/_cli.py b/limbo/_cli.py
@@ -4,17 +4,21 @@
 import json
 import logging
 import os
+import random
 import subprocess
 import sys
+from collections import defaultdict
 from pathlib import Path
 
+import requests
+from pydantic import TypeAdapter
 from pydantic.json_schema import models_json_schema
 
-from limbo import testcases
+from limbo import _github, _markdown, testcases
 from limbo.testcases import bettertls, online
 
 from . import __version__
-from .models import Limbo
+from .models import ActualResult, Limbo, LimboResult
 
 logging.basicConfig()
 logger = logging.getLogger(__name__)
@@ -83,6 +87,19 @@ def main() -> None:
     harness.add_argument("harness", type=str, help="The harness to execute")
     harness.set_defaults(func=_harness)
 
+    # `limbo regression`
+    regression = subparsers.add_parser(
+        "regression", help="Run regression checks against the last result set"
+    )
+    regression.add_argument(
+        "--current",
+        type=Path,
+        default=Path("results"),
+        metavar="DIR",
+        help="The current results to check against",
+    )
+    regression.set_defaults(func=_regression)
+
     args = parser.parse_args()
     args.func(args)
 
@@ -133,3 +150,74 @@ def _harness(args: argparse.Namespace) -> None:
         args.output.write_text(result.stdout)
     except subprocess.CalledProcessError as e:
         print(e.stderr, file=sys.stderr)
+
+
+def _regression(args: argparse.Namespace) -> None:
+    previous_results = TypeAdapter(list[LimboResult]).validate_python(
+        requests.get("https://x509-limbo.com/_api/all-results.json").json()
+    )
+
+    current_results: list[LimboResult] = []
+    for result in args.current.glob("*.json"):
+        current_results.append(LimboResult.model_validate_json(result.read_text()))
+
+    # mapping of harness -> [(testcase-id, previous-result, current-result)]
+    regressions: dict[str, list[tuple[str, ActualResult, ActualResult]]] = defaultdict(list)
+    for previous_result in previous_results:
+        current_result = next(
+            (r for r in current_results if r.harness == previous_result.harness), None
+        )
+        if not current_result:
+            continue
+
+        previous_by_id = {r.id: r for r in previous_result.results}
+        current_by_id = {r.id: r for r in current_result.results}
+
+        common_testcases = previous_by_id.keys() & current_by_id.keys()
+        for tc in common_testcases:
+            if previous_by_id[tc].actual_result != current_by_id[tc].actual_result:
+                regressions[previous_result.harness].append(
+                    (tc, previous_by_id[tc].actual_result, current_by_id[tc].actual_result)
+                )
+
+    if os.getenv("GITHUB_ACTIONS"):
+        if regressions:
+            _github.step_summary(_render_regressions(regressions))
+            _github.comment(
+                ":suspect: Regressions found. Review these changes "
+                "**very carefully** before continuing!\n\n"
+                f"Sampled regressions: {_github.workflow_url()}"
+            )
+            _github.label(add=[_github.REGRESSIONS_LABEL], remove=[_github.NO_REGRESSIONS_LABEL])
+        else:
+            # Avoid spamming the user with "no regression" comments.
+            if not _github.has_label(_github.NO_REGRESSIONS_LABEL):
+                _github.comment(":shipit: No regressions found.")
+                _github.label(
+                    add=[_github.NO_REGRESSIONS_LABEL], remove=[_github.REGRESSIONS_LABEL]
+                )
+
+
+def _render_regressions(
+    all_regressions: dict[str, list[tuple[str, ActualResult, ActualResult]]],
+) -> str:
+    rendered = ""
+
+    for harness, regressions in all_regressions.items():
+        # Sample up to 10 regressions per harness.
+        # Filter the bettertls suite by default, since it's huge.
+        # But re-include if we can't sample enough.
+        regressions_without_bettertls = [
+            r for r in regressions if not r[0].startswith("bettertls::")
+        ]
+        if len(regressions_without_bettertls) >= 10:
+            regressions = random.sample(regressions_without_bettertls, 10)
+        else:
+            regressions = random.sample(regressions, min(len(regressions), 10))
+
+        rendered += f"## {harness}\n\n"
+        for tc, prev, curr in regressions:
+            rendered += f"* {_markdown.testcase_link(tc)} went from {prev.value} to {curr.value}\n"
+        rendered += "\n"
+
+    return rendered
diff --git a/limbo/_github.py b/limbo/_github.py
@@ -0,0 +1,115 @@
+"""
+Utilities for interacting with GitHub and GitHub Actions.
+
+These utilities assume that they're being run from GitHub Actions,
+with a sufficiently permissioned `GITHUB_TOKEN`.
+"""
+
+import json
+import logging
+import os
+from functools import cache
+from pathlib import Path
+from typing import Any
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+REGRESSIONS_LABEL = ":skull: regressions"
+NO_REGRESSIONS_LABEL = ":see_no_evil: no-regressions"
+
+
+@cache
+def github_token() -> str:
+    return os.environ["GITHUB_TOKEN"]
+
+
+@cache
+def github_event() -> dict[str, Any]:
+    return json.loads(Path(os.environ["GITHUB_EVENT_PATH"]).read_text())  # type: ignore[no-any-return]
+
+
+def comment(msg: str) -> None:
+    event = github_event()
+    if "pull_request" not in event:
+        raise ValueError("wrong GitHub event: need pull_request")
+
+    number = event["number"]
+    repo = event["repository"]["full_name"]
+    url = f"https://api.github.com/repos/{repo}/issues/{number}/comments"
+
+    logger.info(f"leaving a comment on {repo} #{number}")
+
+    requests.post(
+        url,
+        headers={
+            "Authorization": f"Bearer {github_token()}",
+            "X-GitHub-Api-Version": "2022-11-28",
+        },
+        json={"body": msg},
+    ).raise_for_status()
+
+
+def label(*, add: list[str], remove: list[str]) -> None:
+    event = github_event()
+    if "pull_request" not in event:
+        raise ValueError("wrong GitHub event: need pull_request")
+
+    number = event["number"]
+    repo = event["repository"]["full_name"]
+    url = f"https://api.github.com/repos/{repo}/issues/{number}/labels"
+
+    if add:
+        logger.info(f"adding labels to {repo} #{number}: {add}")
+        requests.post(
+            url,
+            headers={
+                "Authorization": f"Bearer {github_token()}",
+                "X-GitHub-Api-Version": "2022-11-28",
+            },
+            json={"labels": add},
+        ).raise_for_status()
+
+    for label in remove:
+        logger.info(f"removing label:{label} from {repo} #{number}")
+        resp = requests.delete(
+            f"{url}/{label}",
+            headers={
+                "Authorization": f"Bearer {github_token()}",
+                "X-GitHub-Api-Version": "2022-11-28",
+            },
+        )
+
+        # 404 is expected if the label doesn't exist
+        if resp.status_code != 404:
+            resp.raise_for_status()
+
+
+def has_label(label: str) -> bool:
+    event = github_event()
+    if "pull_request" not in event:
+        raise ValueError("wrong GitHub event: need pull_request")
+
+    number = event["number"]
+    repo = event["repository"]["full_name"]
+    url = f"https://api.github.com/repos/{repo}/issues/{number}/labels"
+
+    resp = requests.get(url)
+    resp.raise_for_status()
+
+    labels = resp.json()
+    return any(lbl["name"] == label for lbl in labels)
+
+
+@cache
+def workflow_url() -> str:
+    url = os.getenv("GITHUB_SERVER_URL")
+    repo = os.getenv("GITHUB_REPOSITORY")
+    run_id = os.getenv("GITHUB_RUN_ID")
+    return f"{url}/{repo}/actions/runs/{run_id}"
+
+
+def step_summary(contents: str) -> None:
+    Path(os.environ["GITHUB_STEP_SUMMARY"]).write_text(contents)