From 65b529176c57f9766c6e7433aac3defa03265806 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 13:47:13 +0100 Subject: [PATCH] replace unarchiver --- .github/workflows/ci-testing-deploy.yml | 2 + ci/github/helpers/install_7zip.bash | 12 ++ scripts/install_7zip.bash | 28 ++++ services/dynamic-sidecar/Dockerfile | 5 + .../core/errors.py | 4 + .../core/utils.py | 2 +- .../modules/nodeports.py | 35 ++--- .../modules/seven_zip_wrapper.py | 59 +++++++ .../unit/test_modules_seven_zip_wrapper.py | 145 ++++++++++++++++++ 9 files changed, 265 insertions(+), 27 deletions(-) create mode 100755 ci/github/helpers/install_7zip.bash create mode 100755 scripts/install_7zip.bash create mode 100644 services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py create mode 100644 services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index aa1efbee7a9..08924e611ef 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -1304,6 +1304,8 @@ jobs: cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt" - name: show system version run: ./ci/helpers/show_system_versions.bash + - name: install 7zip + run: ./ci/github/helpers/install_7zip.bash - name: install run: ./ci/github/unit-testing/dynamic-sidecar.bash install - name: typecheck diff --git a/ci/github/helpers/install_7zip.bash b/ci/github/helpers/install_7zip.bash new file mode 100755 index 00000000000..f30532a8ec8 --- /dev/null +++ b/ci/github/helpers/install_7zip.bash @@ -0,0 +1,12 @@ +#!/bin/bash +# +# Installs the latest version of 7zip plugin +# + +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes +IFS=$'\n\t' + +exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash diff --git a/scripts/install_7zip.bash b/scripts/install_7zip.bash new file mode 100755 index 00000000000..1276839f8ab --- /dev/null +++ b/scripts/install_7zip.bash @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Installs 7zip +# + +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes +IFS=$'\n\t' + + +SEVEN_ZIP_VERSION="2409" +## 7z compression +echo "create install dir" +rm -rf /tmp/7zip +mkdir -p /tmp/7zip +cd /tmp/7zip + +curl -LO https://www.7-zip.org/a/7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz +tar -xvf 7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz +cp 7zz /usr/bin/7z + +echo "remove install dir" +rm -rf /tmp/7zip + +echo "test installation" +7z --help diff --git a/services/dynamic-sidecar/Dockerfile b/services/dynamic-sidecar/Dockerfile index a5173e7f19a..ab59599184f 100644 --- a/services/dynamic-sidecar/Dockerfile +++ b/services/dynamic-sidecar/Dockerfile @@ -30,6 +30,7 @@ RUN \ apt-get update && \ apt-get install -y --no-install-recommends\ curl \ + xz-utils \ gnupg \ lsb-release \ && mkdir -p /etc/apt/keyrings \ @@ -56,6 +57,10 @@ RUN \ RUN \ --mount=type=bind,source=scripts/install_rclone.bash,target=install_rclone.bash \ ./install_rclone.bash +# install 7zip +RUN \ + --mount=type=bind,source=scripts/install_7zip.bash,target=install_7zip.bash \ + ./install_7zip.bash RUN AWS_CLI_VERSION="2.11.11" \ && curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-${AWS_CLI_VERSION}.zip" -o "awscliv2.zip" \ diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py index b9a449ecb36..722b5c2ce14 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py @@ -29,3 +29,7 @@ class ContainerExecCommandFailedError(BaseDynamicSidecarError): "Command '{command}' exited with code '{exit_code}'" "and output: '{command_result}'" ) + + +class SevenZipError(BaseDynamicSidecarError): + msg_template = "Could not finish command: '{command}'\nReason: {command_result}" diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py index 4e6f9ee0df5..3993ce37a56 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py @@ -49,7 +49,7 @@ def _close_transport(proc: Process): async def async_command( command: str, - timeout: float | None = None, + timeout: float | None = None, # noqa: ASYNC109 pipe_as_input: str | None = None, env_vars: dict[str, str] | None = None, ) -> CommandResult: diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 74d898963c2..596876c9244 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -20,12 +20,7 @@ from models_library.projects_nodes_io import NodeIDStr from models_library.services_types import ServicePortKey from pydantic import ByteSize -from servicelib.archiving_utils import ( - PrunableFolder, - UnsupportedArchiveFormatError, - archive_dir, - unarchive_dir, -) +from servicelib.archiving_utils import PrunableFolder, archive_dir from servicelib.async_utils import run_sequentially_in_context from servicelib.file_utils import remove_directory from servicelib.logging_utils import log_context @@ -41,6 +36,7 @@ from ..core.settings import ApplicationSettings, get_settings from ..modules.notifications import PortNotifier +from .seven_zip_wrapper import unarchive_zip_to class PortTypeName(str, Enum): @@ -298,28 +294,15 @@ async def _get_data_from_port( dest_folder = PrunableFolder(final_path) if _is_zip_file(downloaded_file): - # unzip updated data to dest_path - _logger.debug("unzipping %s", downloaded_file) - try: - unarchived: set[Path] = await unarchive_dir( - archive_to_extract=downloaded_file, - destination_folder=final_path, - progress_bar=sub_progress, + with log_context( + _logger, + logging.DEBUG, + f"unzipping '{downloaded_file}' to {final_path}", + ): + unarchived: set[Path] = await unarchive_zip_to( + downloaded_file, final_path, sub_progress ) dest_folder.prune(exclude=unarchived) - - _logger.debug("all unzipped in %s", final_path) - except UnsupportedArchiveFormatError: - _logger.warning( - "Could not extract archive '%s' to '%s' moving it to: '%s'", - downloaded_file, - final_path, - final_path / downloaded_file.name, - ) - await _move_file_to_input_port( - final_path, downloaded_file, dest_folder - ) - else: await _move_file_to_input_port(final_path, downloaded_file, dest_folder) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py new file mode 100644 index 00000000000..5e5896c226e --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py @@ -0,0 +1,59 @@ +import asyncio +import logging +import re +from pathlib import Path + +from models_library.basic_types import IDStr +from servicelib.progress_bar import ProgressBarData + +from ..core.errors import SevenZipError +from ..core.utils import async_command + +_logger = logging.getLogger(__name__) + + +async def _get_file_count(zip_path: Path) -> int: + result = await async_command(f"7z l {zip_path}") + if not result.success: + raise SevenZipError(command=result.command, command_result=result.message) + + match = re.search(r"\s*(\d+)\s*files", result.message) + return int(match.group().replace("files", "").strip()) + + +async def unarchive_zip_to( + zip_path: Path, + output_dir: Path, + progress_bar: ProgressBarData | None = None, +) -> set[Path]: + if not progress_bar: + progress_bar = ProgressBarData( + num_steps=1, description=IDStr(f"extracting {zip_path.name}") + ) + + file_count = await _get_file_count(zip_path) + + command = f"7z x {zip_path} -o{output_dir} -bb1" + process = await asyncio.create_subprocess_shell( + command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + async with progress_bar.sub_progress( + steps=file_count, description=IDStr("...") + ) as sub_prog: + + while True: + line = await process.stdout.readline() + if not line: + break + + line_decoded = line.decode().strip() + if line_decoded.startswith("- "): # check file entry + await sub_prog.update(1) + + await process.wait() + if process.returncode != 0: + stderr = await process.stderr.read() + raise SevenZipError(command=command, command_result=stderr.decode().strip()) + + return {x for x in output_dir.rglob("*") if x.is_file()} diff --git a/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py new file mode 100644 index 00000000000..402200b0bef --- /dev/null +++ b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py @@ -0,0 +1,145 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument + +import subprocess +from pathlib import Path + +import pytest +from _pytest._py.path import LocalPath +from faker import Faker +from models_library.basic_types import IDStr +from models_library.progress_bar import ProgressReport +from servicelib.archiving_utils import archive_dir, unarchive_dir +from servicelib.progress_bar import ProgressBarData +from simcore_service_dynamic_sidecar.modules.seven_zip_wrapper import ( + SevenZipError, + unarchive_zip_to, +) + + +def _ensure_path(dir_path: Path) -> Path: + dir_path.mkdir(parents=True, exist_ok=True) + return dir_path + + +def _assert_same_directory_content(path1: Path, path2: Path) -> None: + assert path1.is_dir() + assert path2.is_dir() + + contents1 = {p.relative_to(path1) for p in path1.rglob("*")} + contents2 = {p.relative_to(path2) for p in path2.rglob("*")} + + assert contents1 == contents2 + + +@pytest.fixture +def to_archive_dir(tmpdir: LocalPath) -> Path: + return _ensure_path(Path(tmpdir) / "to_archive") + + +@pytest.fixture +def internal_tools_unarchived_tools(tmpdir: LocalPath) -> Path: + return _ensure_path(Path(tmpdir) / "internal_unarchived") + + +@pytest.fixture +def external_unarchived_tools(tmpdir: LocalPath) -> Path: + return _ensure_path(Path(tmpdir) / "external_unarchived") + + +@pytest.fixture +def archive_path(tmpdir: LocalPath) -> Path: + return Path(tmpdir) / "archive.zip" + + +@pytest.fixture +def generate_content( + to_archive_dir: Path, sub_dirs: int, files_in_subdirs: int +) -> None: + for i in range(sub_dirs): + (to_archive_dir / f"s{i}").mkdir(parents=True, exist_ok=True) + for k in range(files_in_subdirs): + (to_archive_dir / f"s{i}" / f"{k}.txt").write_text("a" * k) + + +@pytest.fixture +def skip_if_seven_zip_is_missing() -> None: + try: + subprocess.check_output(["7z", "--help"]) # noqa: S607 + except Exception: # pylint: disable=broad-except + pytest.skip("7z is not installed") + + +async def test_missing_path_raises_error( + skip_if_seven_zip_is_missing: None, + faker: Faker, + external_unarchived_tools: Path, +): + missing_path = Path("/tmp") / f"this_path_is_missing_{faker.uuid4()}" # noqa: S108 + with pytest.raises(SevenZipError): + await unarchive_zip_to(missing_path, external_unarchived_tools) + + +def _print_sorted(unarchived_dir: set[Path]) -> None: + print(f"List '{unarchived_dir}'") + for entry in sorted(unarchived_dir): + print(f"{entry}") + + +def _strip_folder_from_path(paths: set[Path], *, to_strip: Path) -> set[Path]: + return {x.relative_to(to_strip) for x in paths} + + +@pytest.mark.parametrize( + "sub_dirs, files_in_subdirs", + [ + pytest.param(50, 40, id="few_items"), + ], +) +async def test_ensure_same_interface_as_unarchive_dir( + skip_if_seven_zip_is_missing: None, + generate_content: Path, + archive_path: Path, + to_archive_dir: Path, + internal_tools_unarchived_tools: Path, + external_unarchived_tools: Path, + sub_dirs: int, + files_in_subdirs: int, +): + + await archive_dir( + to_archive_dir, archive_path, compress=False, store_relative_path=True + ) + + intenal_response = await unarchive_dir( + archive_path, internal_tools_unarchived_tools + ) + + last_actual_progress_value = 0 + + async def _report_progress(progress_report: ProgressReport) -> None: + nonlocal last_actual_progress_value + last_actual_progress_value = progress_report.actual_value + + progress_bar = ProgressBarData( + num_steps=1, + description=IDStr("test progress bar"), + progress_report_cb=_report_progress, + ) + async with progress_bar: + external_response = await unarchive_zip_to( + archive_path, external_unarchived_tools, progress_bar + ) + assert last_actual_progress_value == 1 # ensure progress was reported + assert len(external_response) == sub_dirs * files_in_subdirs + + _assert_same_directory_content( + internal_tools_unarchived_tools, external_unarchived_tools + ) + + _print_sorted(intenal_response) + _print_sorted(external_response) + + assert _strip_folder_from_path( + intenal_response, to_strip=internal_tools_unarchived_tools + ) == _strip_folder_from_path(external_response, to_strip=external_unarchived_tools)