From 97fd9111441a0564a8cf8111d5f9146546f7e972 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 12 Dec 2024 16:17:26 +0100 Subject: [PATCH 01/12] move archive to port instead of failing --- .../modules/nodeports.py | 88 ++++++++++++------- 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 0ad00f2c18d..fc7e56dfb09 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -20,7 +20,12 @@ from models_library.projects_nodes_io import NodeIDStr from models_library.services_types import ServicePortKey from pydantic import ByteSize -from servicelib.archiving_utils import PrunableFolder, archive_dir, unarchive_dir +from servicelib.archiving_utils import ( + ArchiveError, + PrunableFolder, + archive_dir, + unarchive_dir, +) from servicelib.async_utils import run_sequentially_in_context from servicelib.file_utils import remove_directory from servicelib.logging_utils import log_context @@ -46,7 +51,7 @@ class PortTypeName(str, Enum): _FILE_TYPE_PREFIX = "data:" _KEY_VALUE_FILE_NAME = "key_values.json" -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) # OUTPUTS section @@ -95,7 +100,7 @@ async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915 port_notifier: PortNotifier, ) -> None: # pylint: disable=too-many-branches - logger.debug("uploading data to simcore...") + _logger.debug("uploading data to simcore...") start_time = time.perf_counter() settings: ApplicationSettings = get_settings() @@ -138,7 +143,7 @@ async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915 if is_file_type(port.property_type): src_folder = outputs_path / port.key files_and_folders_list = list(src_folder.rglob("*")) - logger.debug("Discovered files to upload %s", files_and_folders_list) + _logger.debug("Discovered files to upload %s", files_and_folders_list) if not files_and_folders_list: ports_values[port.key] = (None, None) @@ -213,9 +218,9 @@ async def _archive_dir_notified( if port.key in data and data[port.key] is not None: ports_values[port.key] = (data[port.key], None) else: - logger.debug("Port %s not found in %s", port.key, data) + _logger.debug("Port %s not found in %s", port.key, data) else: - logger.debug("No file %s to fetch port values from", data_file) + _logger.debug("No file %s to fetch port values from", data_file) if archiving_tasks: await limited_gather(*archiving_tasks, limit=4) @@ -228,8 +233,8 @@ async def _archive_dir_notified( elapsed_time = time.perf_counter() - start_time total_bytes = sum(_get_size_of_value(x) for x in ports_values.values()) - logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) - logger.debug(_CONTROL_TESTMARK_DY_SIDECAR_NODEPORT_UPLOADED_MESSAGE) + _logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) + _logger.debug(_CONTROL_TESTMARK_DY_SIDECAR_NODEPORT_UPLOADED_MESSAGE) # INPUTS section @@ -243,6 +248,24 @@ def _is_zip_file(file_path: Path) -> bool: _shutil_move = aiofiles.os.wrap(shutil.move) +async def _move_file_to_input_port( + final_path: Path, downloaded_file: Path, dest_folder: PrunableFolder +) -> None: + _logger.debug("moving %s", downloaded_file) + final_path = final_path / downloaded_file.name + + # ensure parent exists + final_path.parent.mkdir(exist_ok=True, parents=True) + + await _shutil_move(downloaded_file, final_path) + + # NOTE: after the download the current value of the port + # makes sure previously downloaded files are removed + dest_folder.prune(exclude={final_path}) + + _logger.debug("file moved to %s", final_path) + + async def _get_data_from_port( port: Port, *, target_dir: Path, progress_bar: ProgressBarData ) -> tuple[Port, ItemConcreteValue | None, ByteSize]: @@ -250,7 +273,7 @@ async def _get_data_from_port( steps=2 if is_file_type(port.property_type) else 1, description=IDStr("getting data"), ) as sub_progress: - with log_context(logger, logging.DEBUG, f"getting {port.key=}"): + with log_context(_logger, logging.DEBUG, f"getting {port.key=}"): port_data = await port.get(sub_progress) if is_file_type(port.property_type): @@ -261,7 +284,7 @@ async def _get_data_from_port( if not downloaded_file or not downloaded_file.exists(): # the link may be empty # remove files all files from disk when disconnecting port - logger.debug("removing contents of dir %s", final_path) + _logger.debug("removing contents of dir %s", final_path) await remove_directory( final_path, only_children=True, ignore_errors=True ) @@ -270,33 +293,38 @@ async def _get_data_from_port( transferred_bytes = downloaded_file.stat().st_size # in case of valid file, it is either uncompressed and/or moved to the final directory - with log_context(logger, logging.DEBUG, "creating directory"): + with log_context(_logger, logging.DEBUG, "creating directory"): final_path.mkdir(exist_ok=True, parents=True) port_data = f"{final_path}" dest_folder = PrunableFolder(final_path) if _is_zip_file(downloaded_file): # unzip updated data to dest_path - logger.debug("unzipping %s", downloaded_file) - unarchived: set[Path] = await unarchive_dir( - archive_to_extract=downloaded_file, - destination_folder=final_path, - progress_bar=sub_progress, - ) - - dest_folder.prune(exclude=unarchived) + _logger.debug("unzipping %s", downloaded_file) + try: + unarchived: set[Path] = await unarchive_dir( + archive_to_extract=downloaded_file, + destination_folder=final_path, + progress_bar=sub_progress, + ) + dest_folder.prune(exclude=unarchived) + + _logger.debug("all unzipped in %s", final_path) + except ArchiveError: + _logger.warning( + "Could not extract archive '%s' to '%s' moving it to: '%s'", + downloaded_file, + final_path, + final_path / downloaded_file.name, + ) + await _move_file_to_input_port( + final_path, downloaded_file, dest_folder + ) - logger.debug("all unzipped in %s", final_path) else: - logger.debug("moving %s", downloaded_file) - final_path = final_path / Path(downloaded_file).name - await _shutil_move(str(downloaded_file), final_path) - - # NOTE: after the download the current value of the port - # makes sure previously downloaded files are removed - dest_folder.prune(exclude={final_path}) + await _move_file_to_input_port(final_path, downloaded_file, dest_folder) - logger.debug("all moved to %s", final_path) + _logger.debug("all moved to %s", final_path) else: transferred_bytes = sys.getsizeof(port_data) @@ -312,7 +340,7 @@ async def download_target_ports( progress_bar: ProgressBarData, port_notifier: PortNotifier | None, ) -> ByteSize: - logger.debug("retrieving data from simcore...") + _logger.debug("retrieving data from simcore...") start_time = time.perf_counter() settings: ApplicationSettings = get_settings() @@ -386,7 +414,7 @@ async def _get_date_from_port_notified( data_file.write_text(json.dumps(data)) elapsed_time = time.perf_counter() - start_time - logger.info( + _logger.info( "Downloaded %s in %s seconds", total_transfered_bytes.human_readable(decimal=True), elapsed_time, From ca87d0b8e287478930664c3866ec47de431e3382 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 13 Dec 2024 10:45:57 +0100 Subject: [PATCH 02/12] using log context and rename --- .../modules/nodeports.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index fc7e56dfb09..efa04e8de44 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -251,19 +251,16 @@ def _is_zip_file(file_path: Path) -> bool: async def _move_file_to_input_port( final_path: Path, downloaded_file: Path, dest_folder: PrunableFolder ) -> None: - _logger.debug("moving %s", downloaded_file) - final_path = final_path / downloaded_file.name + with log_context(_logger, logging.DEBUG, f"moving {downloaded_file}"): + final_path = final_path / downloaded_file.name + # ensure parent exists + final_path.parent.mkdir(exist_ok=True, parents=True) - # ensure parent exists - final_path.parent.mkdir(exist_ok=True, parents=True) + await _shutil_move(downloaded_file, final_path) - await _shutil_move(downloaded_file, final_path) - - # NOTE: after the download the current value of the port - # makes sure previously downloaded files are removed - dest_folder.prune(exclude={final_path}) - - _logger.debug("file moved to %s", final_path) + # NOTE: after the port content changes, make sure old files + # which are no longer part of the port, are removed + dest_folder.prune(exclude={final_path}) async def _get_data_from_port( From be58b0c95fc6a91cd6aebb9864e9a29f0beeb214 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 13 Dec 2024 10:53:31 +0100 Subject: [PATCH 03/12] refactor to use less generic error UnsupportedArchiveFormat --- .../service-library/src/servicelib/archiving_utils.py | 8 ++++++++ .../simcore_service_dynamic_sidecar/modules/nodeports.py | 4 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/service-library/src/servicelib/archiving_utils.py b/packages/service-library/src/servicelib/archiving_utils.py index a46665b2c23..f6c78ffe277 100644 --- a/packages/service-library/src/servicelib/archiving_utils.py +++ b/packages/service-library/src/servicelib/archiving_utils.py @@ -35,6 +35,10 @@ class ArchiveError(Exception): """ +class UnsupportedArchiveFormat(Exception): + pass + + def _human_readable_size(size, decimal_places=3): human_readable_file_size = float(size) unit = "B" @@ -260,6 +264,10 @@ async def unarchive_dir( f"Failed unarchiving {archive_to_extract} -> {destination_folder} due to {type(err)}." f"Details: {err}" ) + # maybe write unsupported error format here instead of all this to be able to still raise in case of error + if isinstance(err, NotImplementedError): + raise UnsupportedArchiveFormat(msg) from err + raise ArchiveError(msg) from err # NOTE: extracted_paths includes all tree leafs, which might include files and empty folders diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index efa04e8de44..39076ddb7e8 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -21,8 +21,8 @@ from models_library.services_types import ServicePortKey from pydantic import ByteSize from servicelib.archiving_utils import ( - ArchiveError, PrunableFolder, + UnsupportedArchiveFormat, archive_dir, unarchive_dir, ) @@ -307,7 +307,7 @@ async def _get_data_from_port( dest_folder.prune(exclude=unarchived) _logger.debug("all unzipped in %s", final_path) - except ArchiveError: + except UnsupportedArchiveFormat: _logger.warning( "Could not extract archive '%s' to '%s' moving it to: '%s'", downloaded_file, From 53e0728037a8f937229f143b9a1bfeff12eb6225 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 13 Dec 2024 11:55:37 +0100 Subject: [PATCH 04/12] renaming error --- .../src/servicelib/archiving_utils.py | 4 ++-- .../modules/nodeports.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/service-library/src/servicelib/archiving_utils.py b/packages/service-library/src/servicelib/archiving_utils.py index f6c78ffe277..1b042323c86 100644 --- a/packages/service-library/src/servicelib/archiving_utils.py +++ b/packages/service-library/src/servicelib/archiving_utils.py @@ -35,7 +35,7 @@ class ArchiveError(Exception): """ -class UnsupportedArchiveFormat(Exception): +class UnsupportedArchiveFormatError(Exception): pass @@ -266,7 +266,7 @@ async def unarchive_dir( ) # maybe write unsupported error format here instead of all this to be able to still raise in case of error if isinstance(err, NotImplementedError): - raise UnsupportedArchiveFormat(msg) from err + raise UnsupportedArchiveFormatError(msg) from err raise ArchiveError(msg) from err diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 39076ddb7e8..74d898963c2 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -22,7 +22,7 @@ from pydantic import ByteSize from servicelib.archiving_utils import ( PrunableFolder, - UnsupportedArchiveFormat, + UnsupportedArchiveFormatError, archive_dir, unarchive_dir, ) @@ -281,10 +281,12 @@ async def _get_data_from_port( if not downloaded_file or not downloaded_file.exists(): # the link may be empty # remove files all files from disk when disconnecting port - _logger.debug("removing contents of dir %s", final_path) - await remove_directory( - final_path, only_children=True, ignore_errors=True - ) + with log_context( + _logger, logging.DEBUG, f"removing contents of dir '{final_path}'" + ): + await remove_directory( + final_path, only_children=True, ignore_errors=True + ) return port, None, ByteSize(0) transferred_bytes = downloaded_file.stat().st_size @@ -307,7 +309,7 @@ async def _get_data_from_port( dest_folder.prune(exclude=unarchived) _logger.debug("all unzipped in %s", final_path) - except UnsupportedArchiveFormat: + except UnsupportedArchiveFormatError: _logger.warning( "Could not extract archive '%s' to '%s' moving it to: '%s'", downloaded_file, From 65b529176c57f9766c6e7433aac3defa03265806 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 13:47:13 +0100 Subject: [PATCH 05/12] replace unarchiver --- .github/workflows/ci-testing-deploy.yml | 2 + ci/github/helpers/install_7zip.bash | 12 ++ scripts/install_7zip.bash | 28 ++++ services/dynamic-sidecar/Dockerfile | 5 + .../core/errors.py | 4 + .../core/utils.py | 2 +- .../modules/nodeports.py | 35 ++--- .../modules/seven_zip_wrapper.py | 59 +++++++ .../unit/test_modules_seven_zip_wrapper.py | 145 ++++++++++++++++++ 9 files changed, 265 insertions(+), 27 deletions(-) create mode 100755 ci/github/helpers/install_7zip.bash create mode 100755 scripts/install_7zip.bash create mode 100644 services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py create mode 100644 services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index aa1efbee7a9..08924e611ef 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -1304,6 +1304,8 @@ jobs: cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt" - name: show system version run: ./ci/helpers/show_system_versions.bash + - name: install 7zip + run: ./ci/github/helpers/install_7zip.bash - name: install run: ./ci/github/unit-testing/dynamic-sidecar.bash install - name: typecheck diff --git a/ci/github/helpers/install_7zip.bash b/ci/github/helpers/install_7zip.bash new file mode 100755 index 00000000000..f30532a8ec8 --- /dev/null +++ b/ci/github/helpers/install_7zip.bash @@ -0,0 +1,12 @@ +#!/bin/bash +# +# Installs the latest version of 7zip plugin +# + +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes +IFS=$'\n\t' + +exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash diff --git a/scripts/install_7zip.bash b/scripts/install_7zip.bash new file mode 100755 index 00000000000..1276839f8ab --- /dev/null +++ b/scripts/install_7zip.bash @@ -0,0 +1,28 @@ +#!/bin/bash +# +# Installs 7zip +# + +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes +IFS=$'\n\t' + + +SEVEN_ZIP_VERSION="2409" +## 7z compression +echo "create install dir" +rm -rf /tmp/7zip +mkdir -p /tmp/7zip +cd /tmp/7zip + +curl -LO https://www.7-zip.org/a/7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz +tar -xvf 7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz +cp 7zz /usr/bin/7z + +echo "remove install dir" +rm -rf /tmp/7zip + +echo "test installation" +7z --help diff --git a/services/dynamic-sidecar/Dockerfile b/services/dynamic-sidecar/Dockerfile index a5173e7f19a..ab59599184f 100644 --- a/services/dynamic-sidecar/Dockerfile +++ b/services/dynamic-sidecar/Dockerfile @@ -30,6 +30,7 @@ RUN \ apt-get update && \ apt-get install -y --no-install-recommends\ curl \ + xz-utils \ gnupg \ lsb-release \ && mkdir -p /etc/apt/keyrings \ @@ -56,6 +57,10 @@ RUN \ RUN \ --mount=type=bind,source=scripts/install_rclone.bash,target=install_rclone.bash \ ./install_rclone.bash +# install 7zip +RUN \ + --mount=type=bind,source=scripts/install_7zip.bash,target=install_7zip.bash \ + ./install_7zip.bash RUN AWS_CLI_VERSION="2.11.11" \ && curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-${AWS_CLI_VERSION}.zip" -o "awscliv2.zip" \ diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py index b9a449ecb36..722b5c2ce14 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/errors.py @@ -29,3 +29,7 @@ class ContainerExecCommandFailedError(BaseDynamicSidecarError): "Command '{command}' exited with code '{exit_code}'" "and output: '{command_result}'" ) + + +class SevenZipError(BaseDynamicSidecarError): + msg_template = "Could not finish command: '{command}'\nReason: {command_result}" diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py index 4e6f9ee0df5..3993ce37a56 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/utils.py @@ -49,7 +49,7 @@ def _close_transport(proc: Process): async def async_command( command: str, - timeout: float | None = None, + timeout: float | None = None, # noqa: ASYNC109 pipe_as_input: str | None = None, env_vars: dict[str, str] | None = None, ) -> CommandResult: diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 74d898963c2..596876c9244 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -20,12 +20,7 @@ from models_library.projects_nodes_io import NodeIDStr from models_library.services_types import ServicePortKey from pydantic import ByteSize -from servicelib.archiving_utils import ( - PrunableFolder, - UnsupportedArchiveFormatError, - archive_dir, - unarchive_dir, -) +from servicelib.archiving_utils import PrunableFolder, archive_dir from servicelib.async_utils import run_sequentially_in_context from servicelib.file_utils import remove_directory from servicelib.logging_utils import log_context @@ -41,6 +36,7 @@ from ..core.settings import ApplicationSettings, get_settings from ..modules.notifications import PortNotifier +from .seven_zip_wrapper import unarchive_zip_to class PortTypeName(str, Enum): @@ -298,28 +294,15 @@ async def _get_data_from_port( dest_folder = PrunableFolder(final_path) if _is_zip_file(downloaded_file): - # unzip updated data to dest_path - _logger.debug("unzipping %s", downloaded_file) - try: - unarchived: set[Path] = await unarchive_dir( - archive_to_extract=downloaded_file, - destination_folder=final_path, - progress_bar=sub_progress, + with log_context( + _logger, + logging.DEBUG, + f"unzipping '{downloaded_file}' to {final_path}", + ): + unarchived: set[Path] = await unarchive_zip_to( + downloaded_file, final_path, sub_progress ) dest_folder.prune(exclude=unarchived) - - _logger.debug("all unzipped in %s", final_path) - except UnsupportedArchiveFormatError: - _logger.warning( - "Could not extract archive '%s' to '%s' moving it to: '%s'", - downloaded_file, - final_path, - final_path / downloaded_file.name, - ) - await _move_file_to_input_port( - final_path, downloaded_file, dest_folder - ) - else: await _move_file_to_input_port(final_path, downloaded_file, dest_folder) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py new file mode 100644 index 00000000000..5e5896c226e --- /dev/null +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py @@ -0,0 +1,59 @@ +import asyncio +import logging +import re +from pathlib import Path + +from models_library.basic_types import IDStr +from servicelib.progress_bar import ProgressBarData + +from ..core.errors import SevenZipError +from ..core.utils import async_command + +_logger = logging.getLogger(__name__) + + +async def _get_file_count(zip_path: Path) -> int: + result = await async_command(f"7z l {zip_path}") + if not result.success: + raise SevenZipError(command=result.command, command_result=result.message) + + match = re.search(r"\s*(\d+)\s*files", result.message) + return int(match.group().replace("files", "").strip()) + + +async def unarchive_zip_to( + zip_path: Path, + output_dir: Path, + progress_bar: ProgressBarData | None = None, +) -> set[Path]: + if not progress_bar: + progress_bar = ProgressBarData( + num_steps=1, description=IDStr(f"extracting {zip_path.name}") + ) + + file_count = await _get_file_count(zip_path) + + command = f"7z x {zip_path} -o{output_dir} -bb1" + process = await asyncio.create_subprocess_shell( + command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + async with progress_bar.sub_progress( + steps=file_count, description=IDStr("...") + ) as sub_prog: + + while True: + line = await process.stdout.readline() + if not line: + break + + line_decoded = line.decode().strip() + if line_decoded.startswith("- "): # check file entry + await sub_prog.update(1) + + await process.wait() + if process.returncode != 0: + stderr = await process.stderr.read() + raise SevenZipError(command=command, command_result=stderr.decode().strip()) + + return {x for x in output_dir.rglob("*") if x.is_file()} diff --git a/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py new file mode 100644 index 00000000000..402200b0bef --- /dev/null +++ b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py @@ -0,0 +1,145 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument + +import subprocess +from pathlib import Path + +import pytest +from _pytest._py.path import LocalPath +from faker import Faker +from models_library.basic_types import IDStr +from models_library.progress_bar import ProgressReport +from servicelib.archiving_utils import archive_dir, unarchive_dir +from servicelib.progress_bar import ProgressBarData +from simcore_service_dynamic_sidecar.modules.seven_zip_wrapper import ( + SevenZipError, + unarchive_zip_to, +) + + +def _ensure_path(dir_path: Path) -> Path: + dir_path.mkdir(parents=True, exist_ok=True) + return dir_path + + +def _assert_same_directory_content(path1: Path, path2: Path) -> None: + assert path1.is_dir() + assert path2.is_dir() + + contents1 = {p.relative_to(path1) for p in path1.rglob("*")} + contents2 = {p.relative_to(path2) for p in path2.rglob("*")} + + assert contents1 == contents2 + + +@pytest.fixture +def to_archive_dir(tmpdir: LocalPath) -> Path: + return _ensure_path(Path(tmpdir) / "to_archive") + + +@pytest.fixture +def internal_tools_unarchived_tools(tmpdir: LocalPath) -> Path: + return _ensure_path(Path(tmpdir) / "internal_unarchived") + + +@pytest.fixture +def external_unarchived_tools(tmpdir: LocalPath) -> Path: + return _ensure_path(Path(tmpdir) / "external_unarchived") + + +@pytest.fixture +def archive_path(tmpdir: LocalPath) -> Path: + return Path(tmpdir) / "archive.zip" + + +@pytest.fixture +def generate_content( + to_archive_dir: Path, sub_dirs: int, files_in_subdirs: int +) -> None: + for i in range(sub_dirs): + (to_archive_dir / f"s{i}").mkdir(parents=True, exist_ok=True) + for k in range(files_in_subdirs): + (to_archive_dir / f"s{i}" / f"{k}.txt").write_text("a" * k) + + +@pytest.fixture +def skip_if_seven_zip_is_missing() -> None: + try: + subprocess.check_output(["7z", "--help"]) # noqa: S607 + except Exception: # pylint: disable=broad-except + pytest.skip("7z is not installed") + + +async def test_missing_path_raises_error( + skip_if_seven_zip_is_missing: None, + faker: Faker, + external_unarchived_tools: Path, +): + missing_path = Path("/tmp") / f"this_path_is_missing_{faker.uuid4()}" # noqa: S108 + with pytest.raises(SevenZipError): + await unarchive_zip_to(missing_path, external_unarchived_tools) + + +def _print_sorted(unarchived_dir: set[Path]) -> None: + print(f"List '{unarchived_dir}'") + for entry in sorted(unarchived_dir): + print(f"{entry}") + + +def _strip_folder_from_path(paths: set[Path], *, to_strip: Path) -> set[Path]: + return {x.relative_to(to_strip) for x in paths} + + +@pytest.mark.parametrize( + "sub_dirs, files_in_subdirs", + [ + pytest.param(50, 40, id="few_items"), + ], +) +async def test_ensure_same_interface_as_unarchive_dir( + skip_if_seven_zip_is_missing: None, + generate_content: Path, + archive_path: Path, + to_archive_dir: Path, + internal_tools_unarchived_tools: Path, + external_unarchived_tools: Path, + sub_dirs: int, + files_in_subdirs: int, +): + + await archive_dir( + to_archive_dir, archive_path, compress=False, store_relative_path=True + ) + + intenal_response = await unarchive_dir( + archive_path, internal_tools_unarchived_tools + ) + + last_actual_progress_value = 0 + + async def _report_progress(progress_report: ProgressReport) -> None: + nonlocal last_actual_progress_value + last_actual_progress_value = progress_report.actual_value + + progress_bar = ProgressBarData( + num_steps=1, + description=IDStr("test progress bar"), + progress_report_cb=_report_progress, + ) + async with progress_bar: + external_response = await unarchive_zip_to( + archive_path, external_unarchived_tools, progress_bar + ) + assert last_actual_progress_value == 1 # ensure progress was reported + assert len(external_response) == sub_dirs * files_in_subdirs + + _assert_same_directory_content( + internal_tools_unarchived_tools, external_unarchived_tools + ) + + _print_sorted(intenal_response) + _print_sorted(external_response) + + assert _strip_folder_from_path( + intenal_response, to_strip=internal_tools_unarchived_tools + ) == _strip_folder_from_path(external_response, to_strip=external_unarchived_tools) From e6d118d542ea54fde2fdf313ddd6fc521c1b43c5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 13:48:43 +0100 Subject: [PATCH 06/12] remove unused --- .../service-library/src/servicelib/archiving_utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/packages/service-library/src/servicelib/archiving_utils.py b/packages/service-library/src/servicelib/archiving_utils.py index 1b042323c86..a46665b2c23 100644 --- a/packages/service-library/src/servicelib/archiving_utils.py +++ b/packages/service-library/src/servicelib/archiving_utils.py @@ -35,10 +35,6 @@ class ArchiveError(Exception): """ -class UnsupportedArchiveFormatError(Exception): - pass - - def _human_readable_size(size, decimal_places=3): human_readable_file_size = float(size) unit = "B" @@ -264,10 +260,6 @@ async def unarchive_dir( f"Failed unarchiving {archive_to_extract} -> {destination_folder} due to {type(err)}." f"Details: {err}" ) - # maybe write unsupported error format here instead of all this to be able to still raise in case of error - if isinstance(err, NotImplementedError): - raise UnsupportedArchiveFormatError(msg) from err - raise ArchiveError(msg) from err # NOTE: extracted_paths includes all tree leafs, which might include files and empty folders From 2a2f917f2a52d1b2cea66857cd5f10ef90219d39 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 13:49:46 +0100 Subject: [PATCH 07/12] remove comment --- .../src/simcore_service_dynamic_sidecar/modules/nodeports.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 596876c9244..da716d11328 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -249,7 +249,6 @@ async def _move_file_to_input_port( ) -> None: with log_context(_logger, logging.DEBUG, f"moving {downloaded_file}"): final_path = final_path / downloaded_file.name - # ensure parent exists final_path.parent.mkdir(exist_ok=True, parents=True) await _shutil_move(downloaded_file, final_path) From 56d7fe1ceadeea39967832410bf7cdec4f582965 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 14:10:39 +0100 Subject: [PATCH 08/12] refactor 7zip --- ci/github/helpers/install_rclone.bash | 2 +- .../tests/unit/test_modules_seven_zip_wrapper.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/github/helpers/install_rclone.bash b/ci/github/helpers/install_rclone.bash index 3aa827f80c8..f490ba59dc2 100755 --- a/ci/github/helpers/install_rclone.bash +++ b/ci/github/helpers/install_rclone.bash @@ -9,4 +9,4 @@ set -o nounset # abort on unbound variable set -o pipefail # don't hide errors within pipes IFS=$'\n\t' -exec "$( dirname -- "$0"; )"/../../../scripts/install_rclone.bash +apt-get install p7zip-full diff --git a/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py index 402200b0bef..d1440b6f9ae 100644 --- a/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py +++ b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py @@ -65,7 +65,7 @@ def generate_content( @pytest.fixture def skip_if_seven_zip_is_missing() -> None: try: - subprocess.check_output(["7z", "--help"]) # noqa: S607 + subprocess.check_output(["7z", "--help"]) # noqa: S607, S603 except Exception: # pylint: disable=broad-except pytest.skip("7z is not installed") From 9c82bc54916d2ceed20b1d21c4c8db4e7fb3ae70 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 14:12:40 +0100 Subject: [PATCH 09/12] fixed broken install --- ci/github/helpers/install_7zip.bash | 2 +- ci/github/helpers/install_rclone.bash | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/github/helpers/install_7zip.bash b/ci/github/helpers/install_7zip.bash index f30532a8ec8..6c0a1bf6882 100755 --- a/ci/github/helpers/install_7zip.bash +++ b/ci/github/helpers/install_7zip.bash @@ -9,4 +9,4 @@ set -o nounset # abort on unbound variable set -o pipefail # don't hide errors within pipes IFS=$'\n\t' -exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash +apt-get install p7zip-full diff --git a/ci/github/helpers/install_rclone.bash b/ci/github/helpers/install_rclone.bash index f490ba59dc2..3aa827f80c8 100755 --- a/ci/github/helpers/install_rclone.bash +++ b/ci/github/helpers/install_rclone.bash @@ -9,4 +9,4 @@ set -o nounset # abort on unbound variable set -o pipefail # don't hide errors within pipes IFS=$'\n\t' -apt-get install p7zip-full +exec "$( dirname -- "$0"; )"/../../../scripts/install_rclone.bash From 0f315e0449a56faa6965273e9a1c30572466c23c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 14:16:46 +0100 Subject: [PATCH 10/12] refactor --- .github/workflows/ci-testing-deploy.yml | 2 +- ci/github/helpers/install_7zip.bash | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index 5e5b9b72155..0585de760ae 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -1305,7 +1305,7 @@ jobs: - name: show system version run: ./ci/helpers/show_system_versions.bash - name: install 7zip - run: ./ci/github/helpers/install_7zip.bash + run: sudo ./ci/github/helpers/install_7zip.bash - name: install run: ./ci/github/unit-testing/dynamic-sidecar.bash install - name: typecheck diff --git a/ci/github/helpers/install_7zip.bash b/ci/github/helpers/install_7zip.bash index 6c0a1bf6882..f30532a8ec8 100755 --- a/ci/github/helpers/install_7zip.bash +++ b/ci/github/helpers/install_7zip.bash @@ -9,4 +9,4 @@ set -o nounset # abort on unbound variable set -o pipefail # don't hide errors within pipes IFS=$'\n\t' -apt-get install p7zip-full +exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash From 7ede41afdbb5c7261fcd1bd0c54b96a83eaffdd9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 14:33:36 +0100 Subject: [PATCH 11/12] mypy --- .../modules/seven_zip_wrapper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py index 5e5896c226e..200c0667be1 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/seven_zip_wrapper.py @@ -18,7 +18,7 @@ async def _get_file_count(zip_path: Path) -> int: raise SevenZipError(command=result.command, command_result=result.message) match = re.search(r"\s*(\d+)\s*files", result.message) - return int(match.group().replace("files", "").strip()) + return int(match.group().replace("files", "").strip() if match else "0") async def unarchive_zip_to( @@ -37,6 +37,8 @@ async def unarchive_zip_to( process = await asyncio.create_subprocess_shell( command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) + assert process.stdout # nosec + assert process.stderr # nosec async with progress_bar.sub_progress( steps=file_count, description=IDStr("...") From 9d67dd11a9cab5afee3f33bca29be0f4f34b7f4c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 16 Dec 2024 14:33:44 +0100 Subject: [PATCH 12/12] added extra test --- .../tests/unit/test_modules_seven_zip_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py index d1440b6f9ae..b2656e4a42f 100644 --- a/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py +++ b/services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py @@ -93,7 +93,8 @@ def _strip_folder_from_path(paths: set[Path], *, to_strip: Path) -> set[Path]: @pytest.mark.parametrize( "sub_dirs, files_in_subdirs", [ - pytest.param(50, 40, id="few_items"), + pytest.param(0, 0, id="no_items"), + pytest.param(5, 4, id="few_items"), ], ) async def test_ensure_same_interface_as_unarchive_dir(