From 9d6813d6083696b67bae126171a128e42609cef5 Mon Sep 17 00:00:00 2001 From: Tian Wang <133085652+aws-tianquaw@users.noreply.github.com> Date: Thu, 18 Apr 2024 10:46:46 -0700 Subject: [PATCH] Add python package size delta report feature for SMD images (#363) --- .github/workflows/check-image-size.yml | 40 +++ .github/workflows/monthly-minor-release.yml | 12 + .pre-commit-config.yaml | 1 + README.md | 10 + src/main.py | 20 +- src/package_report.py | 262 ++++++++++++++++++ src/package_staleness.py | 102 ------- src/utils.py | 53 ++++ template/v1/Dockerfile | 1 - ...ge_staleness.py => test_package_report.py} | 75 ++++- 10 files changed, 471 insertions(+), 105 deletions(-) create mode 100644 .github/workflows/check-image-size.yml create mode 100644 src/package_report.py delete mode 100644 src/package_staleness.py rename test/{test_package_staleness.py => test_package_report.py} (61%) diff --git a/.github/workflows/check-image-size.yml b/.github/workflows/check-image-size.yml new file mode 100644 index 00000000..d8a7a7e5 --- /dev/null +++ b/.github/workflows/check-image-size.yml @@ -0,0 +1,40 @@ +name: Check Image Size +on: + # Manually call + workflow_dispatch: + inputs: + image-version: + required: true + description: Image version= + # Call from other workflow + workflow_call: + inputs: + image-version: + type: string + required: true + +defaults: + run: + shell: bash -l {0} + +jobs: + check-image-size: + name: Run image size check + runs-on: ubuntu-latest + if: endsWith(github.repository, '/sagemaker-distribution') + permissions: + pull-requests: write + contents: write + steps: + - uses: actions/checkout@v4 + - uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ./environment.yml + environment-name: sagemaker-distribution + init-shell: bash + - name: Free up disk space + run: rm -rf /opt/hostedtoolcache + - name: Activate sagemaker-distribution + run: micromamba activate sagemaker-distribution + - name: Run size validation + run: python ./src/main.py generate-size-report --target-patch-version ${{ inputs.image-version }} --validate diff --git a/.github/workflows/monthly-minor-release.yml b/.github/workflows/monthly-minor-release.yml index c10bf91a..2de953bb 100644 --- a/.github/workflows/monthly-minor-release.yml +++ b/.github/workflows/monthly-minor-release.yml @@ -43,3 +43,15 @@ jobs: with: release-type: "minor" base-version: ${{ matrix.version }} + check-image-size: + name: Check Image Size + needs: start-monthly-minor + permissions: + pull-requests: write + contents: write + strategy: + matrix: ${{ fromJson(needs.generate-version-matrix.outputs.matrix) }} + fail-fast: false + uses: aws/sagemaker-distribution/.github/workflows/check-image-size.yml@main + with: + base-version: ${{ matrix.version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4c3a2233..b63c6401 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,6 +13,7 @@ repos: hooks: - id: autoflake args: ['--in-place', '--expand-star-imports', '--ignore-init-module-imports', '--remove-all-unused-imports'] + additional_dependencies: [setuptools] - repo: https://github.com/psf/black rev: 23.3.0 hooks: diff --git a/README.md b/README.md index 9792f8b9..5fc7e122 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,16 @@ VERSION= +VERSION= +python ./src/main.py generate-size-report --base-patch-version $BASE_PATCH_VERSION --target-patch-version $VERSION +``` ## Example use cases diff --git a/src/main.py b/src/main.py index 464053af..0b59f3cf 100644 --- a/src/main.py +++ b/src/main.py @@ -20,7 +20,10 @@ _PATCH, _get_dependency_upper_bound_for_runtime_upgrade, ) -from package_staleness import generate_package_staleness_report +from package_report import ( + generate_package_size_report, + generate_package_staleness_report, +) from release_notes_generator import generate_release_notes from utils import ( get_dir_for_version, @@ -399,6 +402,21 @@ def get_arg_parser(): required=True, help="Specify the base patch version for which the package staleness report needs to be " "generated.", ) + package_size_parser = subparsers.add_parser( + "generate-size-report", + help="Generates package size report for each of the packages in the given " "image version.", + ) + package_size_parser.set_defaults(func=generate_package_size_report) + package_size_parser.add_argument( + "--target-patch-version", + required=True, + help="Specify the target patch version for which the package size report needs to be " "generated.", + ) + package_size_parser.add_argument( + "--validate", + action="store_true", + help="Validate package size delta and raise error if the validation failed.", + ) return parser diff --git a/src/package_report.py b/src/package_report.py new file mode 100644 index 00000000..aa2a2bc7 --- /dev/null +++ b/src/package_report.py @@ -0,0 +1,262 @@ +import json +import os +from itertools import islice + +import conda.cli.python_api +from conda.models.match_spec import MatchSpec + +from config import _image_generator_configs +from dependency_upgrader import _dependency_metadata +from utils import ( + create_markdown_table, + get_dir_for_version, + get_match_specs, + get_semver, + pull_conda_package_metadata, + sizeof_fmt, +) + + +def _get_package_versions_in_upstream(target_packages_match_spec_out, target_version) -> dict[str, str]: + package_to_version_mapping = {} + is_major_version_release = target_version.minor == 0 and target_version.patch == 0 + is_minor_version_release = target_version.patch == 0 and not is_major_version_release + for package in target_packages_match_spec_out: + # Execute a conda search api call in the linux-64 subdirectory + # packages such as pytorch-gpu are present only in linux-64 sub directory + match_spec_out = target_packages_match_spec_out[package] + package_version = str(match_spec_out.get("version")).removeprefix("==") + package_version = get_semver(package_version) + channel = match_spec_out.get("channel").channel_name + subdir_filter = "[subdir=" + match_spec_out.get("subdir") + "]" + search_result = conda.cli.python_api.run_command( + "search", channel + "::" + package + ">=" + str(package_version) + subdir_filter, "--json" + ) + # Load the first result as json. The API sends a json string inside an array + package_metadata = json.loads(search_result[0])[package] + # Response is of the structure + # { 'package_name': [{'url':, 'dependencies': , 'version': + # }, ..., {'url':, 'dependencies': , 'version': + # }] + # We only care about the version number in the last index + package_version_in_conda = "" + if is_major_version_release: + latest_package_version_in_conda = package_metadata[-1]["version"] + elif is_minor_version_release: + package_major_version_prefix = str(package_version.major) + "." + latest_package_version_in_conda = [ + x["version"] for x in package_metadata if x["version"].startswith(package_major_version_prefix) + ][-1] + else: + package_minor_version_prefix = ".".join([str(package_version.major), str(package_version.minor)]) + "." + latest_package_version_in_conda = [ + x["version"] for x in package_metadata if x["version"].startswith(package_minor_version_prefix) + ][-1] + + package_to_version_mapping[package] = latest_package_version_in_conda + return package_to_version_mapping + + +def _generate_staleness_report_per_image( + package_versions_in_upstream, target_packages_match_spec_out, image_config, version +): + print("\n# Staleness Report: " + str(version) + "(" + image_config["image_type"] + ")\n") + staleness_report_rows = [] + for package in package_versions_in_upstream: + version_in_sagemaker_distribution = str(target_packages_match_spec_out[package].get("version")).removeprefix( + "==" + ) + package_string = ( + package + if version_in_sagemaker_distribution == package_versions_in_upstream[package] + else "${\color{red}" + package + "}$" + ) + staleness_report_rows.append( + { + "package": package_string, + "version_in_sagemaker_distribution": version_in_sagemaker_distribution, + "latest_relavant_version": package_versions_in_upstream[package], + } + ) + print( + create_markdown_table( + ["Package", "Current Version in the Distribution image", "Latest Relevant Version in " "Upstream"], + staleness_report_rows, + ) + ) + + +def _get_installed_package_versions_and_conda_versions( + image_config, target_version_dir, target_version +) -> (dict[str, MatchSpec], dict[str, str]): + env_in_file_name = image_config["build_args"]["ENV_IN_FILENAME"] + env_out_file_name = image_config["env_out_filename"] + required_packages_from_target = get_match_specs(target_version_dir + "/" + env_in_file_name).keys() + match_spec_out = get_match_specs(target_version_dir + "/" + env_out_file_name) + # We only care about packages which are present in env.in + # Remove Python from the dictionary, we don't want to track python version as part of our + # staleness report. + target_packages_match_spec_out = { + k: v for k, v in match_spec_out.items() if k in required_packages_from_target and k not in _dependency_metadata + } + latest_package_versions_in_upstream = _get_package_versions_in_upstream( + target_packages_match_spec_out, target_version + ) + return target_packages_match_spec_out, latest_package_versions_in_upstream + + +def _validate_new_package_size(new_package_total_size, target_total_size, image_type, target_version): + # Validate if the new packages account for <= 5% of the total python package size of target image. + new_package_total_size_percent_threshold = 5 + validate_result = None + new_package_total_size_percent = round(new_package_total_size / target_total_size * 100, 2) + new_package_total_size_percent_string = str(new_package_total_size_percent) + if new_package_total_size_percent > new_package_total_size_percent_threshold: + validate_result = ( + "The total size of newly introduced Python packages accounts for more than " + + str(new_package_total_size_percent_threshold) + + "% of the total Python package size of " + + image_type + + " image, version " + + str(target_version) + + "! (" + + str(new_package_total_size_percent) + + "%)" + ) + new_package_total_size_percent_string = "${\color{red}" + str(new_package_total_size_percent) + "}$" + + print( + "The total size of newly introduced Python packages is " + + sizeof_fmt(new_package_total_size) + + ", accounts for " + + new_package_total_size_percent_string + + "% of the total package size." + ) + return validate_result + + +def _generate_python_package_size_report_per_image( + base_pkg_metadata, target_pkg_metadata, image_config, base_version, target_version +): + validate_result = None + image_type = image_config["image_type"].upper() + print("\n# Python Package Size Report " + "(" + image_type + ")\n") + print("\n### Target Image Version: " + str(target_version) + " | Base Image Version: " + str(base_version) + "\n") + if not base_pkg_metadata or not base_version: + print("WARNING: No Python package metadata file found for base image, only partial results will be shown.") + base_total_size = sum(d["size"] for d in base_pkg_metadata.values()) if base_pkg_metadata else None + + # Print out the total size change of all Python packages in the image. + target_total_size = sum(d["size"] for d in target_pkg_metadata.values()) + total_size_delta_val = (target_total_size - base_total_size) if base_total_size else None + total_size_delta_rel = (total_size_delta_val / base_total_size) if base_total_size else None + print("\n## Python Packages Total Size Summary\n") + print( + create_markdown_table( + ["Target Version Total Size", "Base Version Total Size", "Size Change (abs)", "Size Change (%)"], + [ + { + "target_total_size": sizeof_fmt(target_total_size), + "base_total_size": sizeof_fmt(base_total_size) if base_total_size else "-", + "size_delta_val": sizeof_fmt(total_size_delta_val) if total_size_delta_val else "-", + "size_delta_rel": str(round(total_size_delta_rel * 100, 2)) if total_size_delta_rel else "-", + } + ], + ) + ) + + # Print out the largest 20 Python packages in the image, sorted decending by size. + print("\n## Top-20 Largest Python Packages\n") + print( + create_markdown_table( + ["Package", "Version in the Target Image", "Size"], + [ + {"pkg": k, "version": v["version"], "size": sizeof_fmt(v["size"])} + for k, v in islice(target_pkg_metadata.items(), None, 20) + ], + ) + ) + + # Print out the size delta for each changed/new package in the image, sorted decending by size. + if base_pkg_metadata: + print("\n## Python Package Size Delta\n") + new_package_total_size = 0 + package_size_delta_list = [] + for k, v in target_pkg_metadata.items(): + if k not in base_pkg_metadata or base_pkg_metadata[k]["version"] != v["version"]: + base_pkg_size = base_pkg_metadata[k]["size"] if k in base_pkg_metadata else 0 + size_delta_abs = v["size"] - base_pkg_size + package_size_delta_list.append( + { + "package": k, + "target_version": v["version"], + "base_version": base_pkg_metadata[k]["version"] if k in base_pkg_metadata else "-", + "size_delta_abs": size_delta_abs, + "size_delta_rel": (size_delta_abs / base_pkg_size) if base_pkg_size else None, + } + ) + if k not in base_pkg_metadata: + new_package_total_size += v["size"] + # Sort the package size delta based on absolute size diff in decending order. + package_size_delta_list = sorted(package_size_delta_list, key=lambda item: item["size_delta_abs"], reverse=True) + for v in package_size_delta_list: + v["size_delta_rel"] = str(round(v["size_delta_rel"] * 100, 2)) if v["size_delta_rel"] else "-" + v["size_delta_abs"] = sizeof_fmt(v["size_delta_abs"]) + + validate_result = _validate_new_package_size( + new_package_total_size, target_total_size, image_type, target_version + ) + print( + create_markdown_table( + [ + "Package", + "Version in the Target Image", + "Version in the Base Image", + "Size Change (abs)", + "Size Change (%)", + ], + package_size_delta_list, + ) + ) + return validate_result + + +def generate_package_staleness_report(args): + target_version = get_semver(args.target_patch_version) + target_version_dir = get_dir_for_version(target_version) + for image_config in _image_generator_configs: + ( + target_packages_match_spec_out, + latest_package_versions_in_upstream, + ) = _get_installed_package_versions_and_conda_versions(image_config, target_version_dir, target_version) + _generate_staleness_report_per_image( + latest_package_versions_in_upstream, target_packages_match_spec_out, image_config, target_version + ) + + +def generate_package_size_report(args): + target_version = get_semver(args.target_patch_version) + target_version_dir = get_dir_for_version(target_version) + + base_version = None + source_version_txt_file_path = f"{target_version_dir}/source-version.txt" + if os.path.exists(source_version_txt_file_path): + with open(source_version_txt_file_path, "r") as f: + source_patch_version = f.readline() + base_version = get_semver(source_patch_version) + base_version_dir = get_dir_for_version(base_version) if base_version else None + validate_results = [] + for image_config in _image_generator_configs: + base_pkg_metadata = pull_conda_package_metadata(image_config, base_version_dir) if base_version else None + target_pkg_metadata = pull_conda_package_metadata(image_config, target_version_dir) + + validate_result = _generate_python_package_size_report_per_image( + base_pkg_metadata, target_pkg_metadata, image_config, base_version, target_version + ) + if validate_result: + validate_results.append(validate_result) + + if args.validate: + if validate_results: + raise Exception(f"Size Validation Failed! Issues found: {validate_results}") + print("Pakcage Size Validation Passed!") diff --git a/src/package_staleness.py b/src/package_staleness.py deleted file mode 100644 index 6a113860..00000000 --- a/src/package_staleness.py +++ /dev/null @@ -1,102 +0,0 @@ -import json - -import conda.cli.python_api -from conda.models.match_spec import MatchSpec - -from config import _image_generator_configs -from dependency_upgrader import _dependency_metadata -from utils import get_dir_for_version, get_match_specs, get_semver - - -def _get_package_versions_in_upstream(target_packages_match_spec_out, target_version) -> dict[str, str]: - package_to_version_mapping = {} - is_major_version_release = target_version.minor == 0 and target_version.patch == 0 - is_minor_version_release = target_version.patch == 0 and not is_major_version_release - for package in target_packages_match_spec_out: - # Execute a conda search api call in the linux-64 subdirectory - # packages such as pytorch-gpu are present only in linux-64 sub directory - match_spec_out = target_packages_match_spec_out[package] - package_version = str(match_spec_out.get("version")).removeprefix("==") - package_version = get_semver(package_version) - channel = match_spec_out.get("channel").channel_name - subdir_filter = "[subdir=" + match_spec_out.get("subdir") + "]" - search_result = conda.cli.python_api.run_command( - "search", channel + "::" + package + ">=" + str(package_version) + subdir_filter, "--json" - ) - # Load the first result as json. The API sends a json string inside an array - package_metadata = json.loads(search_result[0])[package] - # Response is of the structure - # { 'package_name': [{'url':, 'dependencies': , 'version': - # }, ..., {'url':, 'dependencies': , 'version': - # }] - # We only care about the version number in the last index - package_version_in_conda = "" - if is_major_version_release: - latest_package_version_in_conda = package_metadata[-1]["version"] - elif is_minor_version_release: - package_major_version_prefix = str(package_version.major) + "." - latest_package_version_in_conda = [ - x["version"] for x in package_metadata if x["version"].startswith(package_major_version_prefix) - ][-1] - else: - package_minor_version_prefix = ".".join([str(package_version.major), str(package_version.minor)]) + "." - latest_package_version_in_conda = [ - x["version"] for x in package_metadata if x["version"].startswith(package_minor_version_prefix) - ][-1] - - package_to_version_mapping[package] = latest_package_version_in_conda - return package_to_version_mapping - - -def _generate_report(package_versions_in_upstream, target_packages_match_spec_out, image_config, version): - print("\n# Staleness Report: " + str(version) + "(" + image_config["image_type"] + ")\n") - print("Package | Current Version in the Distribution image | Latest Relevant Version in " "Upstream") - print("---|---|---") - for package in package_versions_in_upstream: - version_in_sagemaker_distribution = str(target_packages_match_spec_out[package].get("version")).removeprefix( - "==" - ) - if version_in_sagemaker_distribution == package_versions_in_upstream[package]: - print(package + "|" + version_in_sagemaker_distribution + "|" + package_versions_in_upstream[package]) - else: - print( - "${\color{red}" - + package - + "}$" - + "|" - + version_in_sagemaker_distribution - + "|" - + package_versions_in_upstream[package] - ) - - -def _get_installed_package_versions_and_conda_versions( - image_config, target_version_dir, target_version -) -> (dict[str, MatchSpec], dict[str, str]): - env_in_file_name = image_config["build_args"]["ENV_IN_FILENAME"] - env_out_file_name = image_config["env_out_filename"] - required_packages_from_target = get_match_specs(target_version_dir + "/" + env_in_file_name).keys() - match_spec_out = get_match_specs(target_version_dir + "/" + env_out_file_name) - # We only care about packages which are present in env.in - # Remove Python from the dictionary, we don't want to track python version as part of our - # staleness report. - target_packages_match_spec_out = { - k: v for k, v in match_spec_out.items() if k in required_packages_from_target and k not in _dependency_metadata - } - latest_package_versions_in_upstream = _get_package_versions_in_upstream( - target_packages_match_spec_out, target_version - ) - return target_packages_match_spec_out, latest_package_versions_in_upstream - - -def generate_package_staleness_report(args): - target_version = get_semver(args.target_patch_version) - target_version_dir = get_dir_for_version(target_version) - for image_config in _image_generator_configs: - ( - target_packages_match_spec_out, - latest_package_versions_in_upstream, - ) = _get_installed_package_versions_and_conda_versions(image_config, target_version_dir, target_version) - _generate_report( - latest_package_versions_in_upstream, target_packages_match_spec_out, image_config, target_version - ) diff --git a/src/utils.py b/src/utils.py index daf1e17e..d8cf991b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -1,5 +1,7 @@ +import json import os +import conda.cli.python_api from conda.env.specs import RequirementsSpec from conda.models.match_spec import MatchSpec from semver import Version @@ -50,3 +52,54 @@ def get_match_specs(file_path) -> dict[str, MatchSpec]: assert "conda" in requirement_spec.environment.dependencies return {MatchSpec(i).get("name"): MatchSpec(i) for i in requirement_spec.environment.dependencies["conda"]} + + +def sizeof_fmt(num): + # Convert byte to human-readable size units. + for unit in ("B", "KB", "MB", "GB"): + if abs(num) < 1024.0: + return f"{num:3.2f}{unit}" + num /= 1024.0 + return f"{num:.2f}TB" + + +def create_markdown_table(headers, rows): + """Loop through a data rows and return a markdown table as a multi-line string. + + headers -- A list of strings, each string represents a column name + rows -- A list of dicts, each dict is a row + """ + markdowntable = "" + # Make a string of all the keys in the first dict with pipes before after and between each key + markdownheader = " | ".join(headers) + # Make a header separator line with dashes instead of key names + markdownheaderseparator = "---|" * (len(headers) - 1) + "---" + # Add the header row and separator to the table + markdowntable += markdownheader + "\n" + markdowntable += markdownheaderseparator + "\n" + # Loop through the list of dictionaries outputting the rows + for row in rows: + markdownrow = "" + for k, v in row.items(): + markdownrow += str(v) + "|" + markdowntable += markdownrow[:-1] + "\n" + return markdowntable + + +def pull_conda_package_metadata(image_config, image_artifact_dir): + results = dict() + env_out_file_name = image_config["env_out_filename"] + match_spec_out = get_match_specs(image_artifact_dir + "/" + env_out_file_name) + + target_packages_match_spec_out = {k: v for k, v in match_spec_out.items()} + + for package, match_spec_out in target_packages_match_spec_out.items(): + if str(match_spec_out).startswith("conda-forge"): + # Pull package metadata from conda-forge and dump into json file + search_result = conda.cli.python_api.run_command("search", str(match_spec_out), "--json") + package_metadata = json.loads(search_result[0])[package][0] + results[package] = {"version": package_metadata["version"], "size": package_metadata["size"]} + # Sort the pakcage sizes in decreasing order + results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1]["size"], reverse=True)} + + return results diff --git a/template/v1/Dockerfile b/template/v1/Dockerfile index a0fe9e71..8f1ac36a 100644 --- a/template/v1/Dockerfile +++ b/template/v1/Dockerfile @@ -194,4 +194,3 @@ RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;impor sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json ENV SHELL=/bin/bash - diff --git a/test/test_package_staleness.py b/test/test_package_report.py similarity index 61% rename from test/test_package_staleness.py rename to test/test_package_report.py index b26283a5..c59c5e72 100644 --- a/test/test_package_staleness.py +++ b/test/test_package_report.py @@ -7,7 +7,10 @@ from unittest.mock import patch from config import _image_generator_configs -from package_staleness import _get_installed_package_versions_and_conda_versions +from package_report import ( + _generate_python_package_size_report_per_image, + _get_installed_package_versions_and_conda_versions, +) from utils import get_match_specs, get_semver @@ -31,6 +34,23 @@ def _create_env_out_docker_file(file_path): ) +def _create_base_image_package_metadata(): + return { + "libllvm18": {"version": "18.1.1", "size": 37301754}, + "python": {"version": "3.12.1", "size": 30213651}, + "tqdm": {"version": "4.66.2", "size": 89567}, + } + + +def _create_target_image_package_metadata(): + return { + "libllvm18": {"version": "18.1.2", "size": 38407510}, + "python": {"version": "3.12.2", "size": 32312631}, + "libclang": {"version": "18.1.2", "size": 19272925}, + "tqdm": {"version": "4.66.2", "size": 89567}, + } + + def test_get_match_specs(tmp_path): env_out_file_path = tmp_path / "env.out" _create_env_out_docker_file(env_out_file_path) @@ -93,3 +113,56 @@ def test_get_installed_package_versions_and_conda_versions(mock_run_command, tmp assert latest_package_versions_in_conda_forge["ipykernel"] == "6.21.3" # Only for numpy there is a new major version available. assert latest_package_versions_in_conda_forge["numpy"] == "2.1.0" + + +def test_generate_package_size_report(capsys): + base_pkg_metadata = _create_base_image_package_metadata() + target_pkg_metadata = _create_target_image_package_metadata() + + _generate_python_package_size_report_per_image( + base_pkg_metadata, target_pkg_metadata, _image_generator_configs[1], "1.6.1", "1.6.2" + ) + + captured = capsys.readouterr() + # Assert total size delta report + assert "85.91MB|64.47MB|21.44MB|33.25" in captured.out + + # Assert size delta report for each changed package + assert "libclang|18.1.2|-|18.38MB|-" in captured.out + assert "python|3.12.2|3.12.1|2.00MB|6.95" in captured.out + assert "libllvm18|18.1.2|18.1.1|1.05MB|2.96" in captured.out + assert "tqdm|4.66.2|4.66.2" not in captured.out + + # Assert top-k largest package report + assert "libllvm18|18.1.2|36.63MB" in captured.out + assert "python|3.12.2|30.82MB" in captured.out + assert "libclang|18.1.2|18.38MB" in captured.out + assert "tqdm|4.66.2|87.47KB" in captured.out + + # Assert size validation message + assert ( + "The total size of newly introduced Python packages is 18.38MB, accounts for ${\color{red}21.39}$% of the total package size." + in captured.out + ) + + +def test_generate_package_size_report_when_base_version_is_not_present(capsys): + target_pkg_metadata = _create_target_image_package_metadata() + + _generate_python_package_size_report_per_image( + None, target_pkg_metadata, _image_generator_configs[1], None, "1.6.2" + ) + + captured = capsys.readouterr() + # Assert total size delta report + assert ( + "WARNING: No Python package metadata file found for base image, only partial results will be shown." + in captured.out + ) + assert "85.91MB|-|-|-" in captured.out + + # Assert top-k largest package report + assert "libllvm18|18.1.2|36.63MB" in captured.out + assert "python|3.12.2|30.82MB" in captured.out + assert "libclang|18.1.2|18.38MB" in captured.out + assert "tqdm|4.66.2|87.47KB" in captured.out