diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3944cd..4b1244f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -113,9 +113,14 @@ jobs: invoke test - name: Benchmark + id: benchmark run: | - echo 'benchmark='"$(invoke benchmark -f base64)" >> "$GITHUB_OUTPUT" - + invoke benchmark -f json | tee benchmark.txt + - + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.python_version }}_${{ matrix.os }}_${{ matrix.arch }}_${{ matrix.experimental }} + path: benchmark.txt check-versions: runs-on: 'ubuntu-latest' @@ -151,3 +156,76 @@ jobs: if [ "x$(invoke local-tag-exists --format json 'v${{ steps.version.outputs.CURRENT_VERSION }}')" = 'xtrue' ]; then echo '::warning file=CURRENT_VERSION.txt,line=2,title=Version already exists in tags::Tag v${{ steps.version.outputs.CURRENT_VERSION }} already exists.' fi + + collect_benchmark: + runs-on: 'ubuntu-latest' + needs: test_matrix + steps: + - + name: Set up Python 3 + uses: actions/setup-python@v5 + with: + python-version: '3.x' + architecture: 'x64' + - + uses: actions/download-artifact@v4 + with: + path: benchmarks + - + run: | + ls -R benchmarks + (jq -c -r --slurp <<< $(for name in $(echo benchmarks/*); do cat "$name/benchmark.txt" | jq -c -r '.[] | ["'$(basename $name)'", .] | flatten'; done)) | tee benchmark.json + python -m pip install pytablewriter terminaltables + python -c ' + import json + import pytablewriter as ptw + from terminaltables import GithubFlavoredMarkdownTable + + with open("benchmark.json", "rb") as fh: + items = json.load(fh) + platforms = tuple(frozenset(p[0] for p in items)) + keys = [""] + rows = [None] * len(platforms) + for platform, group, test_name, result in items: + col_name = f"{group}\n{test_name}" + try: + keys.index(col_name) + except ValueError: + keys.append(col_name) + + def _sort(s: str): + version, platform, arch, experimental = s.split("_") + experimental = experimental.lower() == "true" + is_pypy = False + if version.startswith("pypy"): + version = version[len("pypy"):] + is_pypy = True + major, minor = map(int, version.split(".")) + return (-1 if is_pypy else 0, (major, minor), platform, arch, experimental) + + platforms = tuple(sorted(platforms, key=_sort, reverse=True)) + print("platforms", platforms) + for platform, group, test_name, result in items: + col_name = f"{group}\n{test_name}" + key_index = keys.index(col_name) + row_index = platforms.index(platform) + if rows[row_index] is None: + rows[row_index] = [None] * len(keys) + rows[row_index][0] = platform + rows[row_index][key_index] = result + if True: + table = GithubFlavoredMarkdownTable([keys, *rows]) + with open("BENCHMARK.md", "w") as fh: + fh.write("# Benchmark of ${{ github.sha }}\n\n") + fh.write(table.table) + else: + writer = ptw.RstGridTableWriter( + table_name="Benchmark of ${{ github.sha }}", + headers=keys[1:], + value_matrix=rows, + ) + with open("BENCHMARK.rst", "w") as fh: + writer.dump(fh) + ' + cat BENCHMARK.* + echo "$(cat BENCHMARK.md)" >> "$GITHUB_STEP_SUMMARY" diff --git a/instruct/__main__.py b/instruct/__main__.py index cf4fe63..9f3a40e 100644 --- a/instruct/__main__.py +++ b/instruct/__main__.py @@ -51,49 +51,50 @@ class Next(ComplexTest): next: int -def main(): +def main(count=1_000_000): ttl = timeit.timeit( - 't = Test(name_or_id="name")', setup="from __main__ import Test", number=1000000 + 't = Test(name_or_id="name")', setup="from __main__ import Test", number=count ) - per_round_ms = (ttl / 1000000) * 1000000 - print("Overhead of allocation, one field, safeties on: {:.2f}us".format(per_round_ms)) + print("Overhead of allocation") + per_round_ms = (ttl / count) * count + print("one field, safeties on: {:.2f} us".format(per_round_ms)) ttl = timeit.timeit( 't = Test(name_or_id="name")', setup="from __main__ import TestOptimized as Test", - number=1000000, + number=count, ) - per_round_ms = (ttl / 1000000) * 1000000 - print("Overhead of allocation, one field, safeties off: {:.2f}us".format(per_round_ms)) + per_round_ms = (ttl / count) * count + print("one field, safeties off: {:.2f} us".format(per_round_ms)) - print("Overhead of setting a field:") + print("Overhead of setting a field") ttl = timeit.timeit(test_statement, setup="from __main__ import Test;t = Test()") - per_round_ms = (ttl / 1000000) * 1000000 + per_round_ms = (ttl / count) * count print("Test with safeties: {:.2f} us".format(per_round_ms)) ttl = timeit.timeit( test_statement, setup="from __main__ import TestOptimized as Test;t = Test()", - number=1000000, + number=count, ) - per_round_ms = (ttl / 1000000) * 1000000 + per_round_ms = (ttl / count) * count print("Test without safeties: {:.2f} us".format(per_round_ms)) print("Overhead of clearing/setting") ttl = timeit.timeit( "clear(t);t.name_or_id = 1", setup='from __main__ import Test, clear;t = Test(name_or_id="name")', - number=1000000, + number=count, ) - per_round_ms = (ttl / 1000000) * 1000000 + per_round_ms = (ttl / count) * count print("Test with safeties: {:.2f} us".format(per_round_ms)) ttl = timeit.timeit( "clear(t);t.name_or_id = 1", setup='from __main__ import TestOptimized as Test,clear;t = Test(name_or_id="name")', - number=1000000, + number=count, ) - per_round_ms = (ttl / 1000000) * 1000000 + per_round_ms = (ttl / count) * count print("Test without safeties: {:.2f} us".format(per_round_ms)) @@ -112,6 +113,7 @@ def main(): subparsers = parser.add_subparsers() benchmark = subparsers.add_parser("benchmark") benchmark.set_defaults(mode="benchmark") + benchmark.add_argument("count", default=1_000_000, type=int, type="?") if PyCallGraph is not None: callgraph = subparsers.add_parser("callgraph") callgraph.set_defaults(mode="callgraph") @@ -121,7 +123,7 @@ def main(): if not args.mode: raise SystemExit("Use benchmark or callgraph") if args.mode == "benchmark": - main() + main(args.count) if PyCallGraph and args.mode == "callgraph": names = [random.choice((("test",) * 10) + (-1, None)) for _ in range(1000)] ids = [random.randint(1, 232) for _ in range(1000)] diff --git a/tasks.py b/tasks.py index db31c7a..aebe224 100644 --- a/tasks.py +++ b/tasks.py @@ -295,7 +295,8 @@ def test(context: Context, *, verbose: bool = False, fail_fast: bool = False): extra = f"{extra} -svvv" if fail_fast: extra = f"{extra} -x" - context.run(f"{python_bin} -m pytest {extra}") + context.run(f"{python_bin} -m coverage run -m pytest {extra}") + context.run(f"{python_bin} -m coverage report -m") @task @@ -1005,15 +1006,40 @@ def parse_with_unit(s: str) -> Tuple[Union[int, float], str]: @task -def benchmark(context: Context) -> UnitValue: +def benchmark( + context: Context, + type_: Union[Type[UnitValue], Type[str], Literal["UnitValue", "str"]] = "str", + *, + count: Optional[int] = None, +) -> Union[UnitValue, Tuple[str, ...]]: + if type_ == "UnitValue": + type_ = UnitValue + elif type_ == "str": + type_ = str + assert type_ in (str, UnitValue) python_bin = _.python_path(str, silent=True) - fh = context.run(f"{python_bin} -m instruct benchmark", hide="stdout") + fh = context.run(f"{python_bin} -m instruct benchmark {count or ''}", hide="stdout") assert fh is not None tests = [] + section = None for line in fh.stdout.strip().splitlines(): with suppress(ValueError): name, val = (x.strip() for x in line.strip().split(":", 1)) if val: - tests.append(UnitValue(name, _.parse_with_unit(val))) + if type_ is UnitValue: + v = UnitValue(name, _.parse_with_unit(val, silent=True)) + else: + v = ( + f"{name}", + f"{val}", + ) + if section: + tests.append((section, *v)) + else: + tests.append(v) + continue + if line.strip().endswith(":"): + line = line.strip()[:-1] + section = line return tuple(tests) diff --git a/test-requirements.txt b/test-requirements.txt index d0ecf7f..2b1d6ae 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -3,3 +3,5 @@ pytest-mock flake8<5.0.4a0; python_version < '3.8' flake8>6.0.0; python_version >= '3.8' pathspec<0.12.0; python_version < '3.8' +pytest-cov +coverage-badge