Skip to content

Commit

Permalink
replace unarchiver
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrei Neagu committed Dec 16, 2024
1 parent 53e0728 commit 65b5291
Show file tree
Hide file tree
Showing 9 changed files with 265 additions and 27 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci-testing-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1304,6 +1304,8 @@ jobs:
cache-dependency-glob: "**/dynamic-sidecar/requirements/ci.txt"
- name: show system version
run: ./ci/helpers/show_system_versions.bash
- name: install 7zip
run: ./ci/github/helpers/install_7zip.bash
- name: install
run: ./ci/github/unit-testing/dynamic-sidecar.bash install
- name: typecheck
Expand Down
12 changes: 12 additions & 0 deletions ci/github/helpers/install_7zip.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
#
# Installs the latest version of 7zip plugin
#

# http://redsymbol.net/articles/unofficial-bash-strict-mode/
set -o errexit # abort on nonzero exitstatus
set -o nounset # abort on unbound variable
set -o pipefail # don't hide errors within pipes
IFS=$'\n\t'

exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash
28 changes: 28 additions & 0 deletions scripts/install_7zip.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/bash
#
# Installs 7zip
#

# http://redsymbol.net/articles/unofficial-bash-strict-mode/
set -o errexit # abort on nonzero exitstatus
set -o nounset # abort on unbound variable
set -o pipefail # don't hide errors within pipes
IFS=$'\n\t'


SEVEN_ZIP_VERSION="2409"
## 7z compression
echo "create install dir"
rm -rf /tmp/7zip
mkdir -p /tmp/7zip
cd /tmp/7zip

curl -LO https://www.7-zip.org/a/7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz
tar -xvf 7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz
cp 7zz /usr/bin/7z

echo "remove install dir"
rm -rf /tmp/7zip

echo "test installation"
7z --help
5 changes: 5 additions & 0 deletions services/dynamic-sidecar/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ RUN \
apt-get update && \
apt-get install -y --no-install-recommends\
curl \
xz-utils \
gnupg \
lsb-release \
&& mkdir -p /etc/apt/keyrings \
Expand All @@ -56,6 +57,10 @@ RUN \
RUN \
--mount=type=bind,source=scripts/install_rclone.bash,target=install_rclone.bash \
./install_rclone.bash
# install 7zip
RUN \
--mount=type=bind,source=scripts/install_7zip.bash,target=install_7zip.bash \
./install_7zip.bash

RUN AWS_CLI_VERSION="2.11.11" \
&& curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-${AWS_CLI_VERSION}.zip" -o "awscliv2.zip" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@ class ContainerExecCommandFailedError(BaseDynamicSidecarError):
"Command '{command}' exited with code '{exit_code}'"
"and output: '{command_result}'"
)


class SevenZipError(BaseDynamicSidecarError):
msg_template = "Could not finish command: '{command}'\nReason: {command_result}"
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _close_transport(proc: Process):

async def async_command(
command: str,
timeout: float | None = None,
timeout: float | None = None, # noqa: ASYNC109
pipe_as_input: str | None = None,
env_vars: dict[str, str] | None = None,
) -> CommandResult:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,7 @@
from models_library.projects_nodes_io import NodeIDStr
from models_library.services_types import ServicePortKey
from pydantic import ByteSize
from servicelib.archiving_utils import (
PrunableFolder,
UnsupportedArchiveFormatError,
archive_dir,
unarchive_dir,
)
from servicelib.archiving_utils import PrunableFolder, archive_dir
from servicelib.async_utils import run_sequentially_in_context
from servicelib.file_utils import remove_directory
from servicelib.logging_utils import log_context
Expand All @@ -41,6 +36,7 @@

from ..core.settings import ApplicationSettings, get_settings
from ..modules.notifications import PortNotifier
from .seven_zip_wrapper import unarchive_zip_to


class PortTypeName(str, Enum):
Expand Down Expand Up @@ -298,28 +294,15 @@ async def _get_data_from_port(
dest_folder = PrunableFolder(final_path)

if _is_zip_file(downloaded_file):
# unzip updated data to dest_path
_logger.debug("unzipping %s", downloaded_file)
try:
unarchived: set[Path] = await unarchive_dir(
archive_to_extract=downloaded_file,
destination_folder=final_path,
progress_bar=sub_progress,
with log_context(
_logger,
logging.DEBUG,
f"unzipping '{downloaded_file}' to {final_path}",
):
unarchived: set[Path] = await unarchive_zip_to(
downloaded_file, final_path, sub_progress
)
dest_folder.prune(exclude=unarchived)

_logger.debug("all unzipped in %s", final_path)
except UnsupportedArchiveFormatError:
_logger.warning(
"Could not extract archive '%s' to '%s' moving it to: '%s'",
downloaded_file,
final_path,
final_path / downloaded_file.name,
)
await _move_file_to_input_port(
final_path, downloaded_file, dest_folder
)

else:
await _move_file_to_input_port(final_path, downloaded_file, dest_folder)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import asyncio
import logging
import re
from pathlib import Path

from models_library.basic_types import IDStr
from servicelib.progress_bar import ProgressBarData

from ..core.errors import SevenZipError
from ..core.utils import async_command

_logger = logging.getLogger(__name__)


async def _get_file_count(zip_path: Path) -> int:
result = await async_command(f"7z l {zip_path}")
if not result.success:
raise SevenZipError(command=result.command, command_result=result.message)

match = re.search(r"\s*(\d+)\s*files", result.message)
return int(match.group().replace("files", "").strip())


async def unarchive_zip_to(
zip_path: Path,
output_dir: Path,
progress_bar: ProgressBarData | None = None,
) -> set[Path]:
if not progress_bar:
progress_bar = ProgressBarData(
num_steps=1, description=IDStr(f"extracting {zip_path.name}")
)

file_count = await _get_file_count(zip_path)

command = f"7z x {zip_path} -o{output_dir} -bb1"
process = await asyncio.create_subprocess_shell(
command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)

async with progress_bar.sub_progress(
steps=file_count, description=IDStr("...")
) as sub_prog:

while True:
line = await process.stdout.readline()
if not line:
break

line_decoded = line.decode().strip()
if line_decoded.startswith("- "): # check file entry
await sub_prog.update(1)

await process.wait()
if process.returncode != 0:
stderr = await process.stderr.read()
raise SevenZipError(command=command, command_result=stderr.decode().strip())

return {x for x in output_dir.rglob("*") if x.is_file()}
145 changes: 145 additions & 0 deletions services/dynamic-sidecar/tests/unit/test_modules_seven_zip_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# pylint: disable=redefined-outer-name
# pylint: disable=unused-argument

import subprocess
from pathlib import Path

import pytest
from _pytest._py.path import LocalPath
from faker import Faker
from models_library.basic_types import IDStr
from models_library.progress_bar import ProgressReport
from servicelib.archiving_utils import archive_dir, unarchive_dir
from servicelib.progress_bar import ProgressBarData
from simcore_service_dynamic_sidecar.modules.seven_zip_wrapper import (
SevenZipError,
unarchive_zip_to,
)


def _ensure_path(dir_path: Path) -> Path:
dir_path.mkdir(parents=True, exist_ok=True)
return dir_path


def _assert_same_directory_content(path1: Path, path2: Path) -> None:
assert path1.is_dir()
assert path2.is_dir()

contents1 = {p.relative_to(path1) for p in path1.rglob("*")}
contents2 = {p.relative_to(path2) for p in path2.rglob("*")}

assert contents1 == contents2


@pytest.fixture
def to_archive_dir(tmpdir: LocalPath) -> Path:
return _ensure_path(Path(tmpdir) / "to_archive")


@pytest.fixture
def internal_tools_unarchived_tools(tmpdir: LocalPath) -> Path:
return _ensure_path(Path(tmpdir) / "internal_unarchived")


@pytest.fixture
def external_unarchived_tools(tmpdir: LocalPath) -> Path:
return _ensure_path(Path(tmpdir) / "external_unarchived")


@pytest.fixture
def archive_path(tmpdir: LocalPath) -> Path:
return Path(tmpdir) / "archive.zip"


@pytest.fixture
def generate_content(
to_archive_dir: Path, sub_dirs: int, files_in_subdirs: int
) -> None:
for i in range(sub_dirs):
(to_archive_dir / f"s{i}").mkdir(parents=True, exist_ok=True)
for k in range(files_in_subdirs):
(to_archive_dir / f"s{i}" / f"{k}.txt").write_text("a" * k)


@pytest.fixture
def skip_if_seven_zip_is_missing() -> None:
try:
subprocess.check_output(["7z", "--help"]) # noqa: S607
except Exception: # pylint: disable=broad-except
pytest.skip("7z is not installed")


async def test_missing_path_raises_error(
skip_if_seven_zip_is_missing: None,
faker: Faker,
external_unarchived_tools: Path,
):
missing_path = Path("/tmp") / f"this_path_is_missing_{faker.uuid4()}" # noqa: S108
with pytest.raises(SevenZipError):
await unarchive_zip_to(missing_path, external_unarchived_tools)


def _print_sorted(unarchived_dir: set[Path]) -> None:
print(f"List '{unarchived_dir}'")
for entry in sorted(unarchived_dir):
print(f"{entry}")


def _strip_folder_from_path(paths: set[Path], *, to_strip: Path) -> set[Path]:
return {x.relative_to(to_strip) for x in paths}


@pytest.mark.parametrize(
"sub_dirs, files_in_subdirs",
[
pytest.param(50, 40, id="few_items"),
],
)
async def test_ensure_same_interface_as_unarchive_dir(
skip_if_seven_zip_is_missing: None,
generate_content: Path,
archive_path: Path,
to_archive_dir: Path,
internal_tools_unarchived_tools: Path,
external_unarchived_tools: Path,
sub_dirs: int,
files_in_subdirs: int,
):

await archive_dir(
to_archive_dir, archive_path, compress=False, store_relative_path=True
)

intenal_response = await unarchive_dir(
archive_path, internal_tools_unarchived_tools
)

last_actual_progress_value = 0

async def _report_progress(progress_report: ProgressReport) -> None:
nonlocal last_actual_progress_value
last_actual_progress_value = progress_report.actual_value

progress_bar = ProgressBarData(
num_steps=1,
description=IDStr("test progress bar"),
progress_report_cb=_report_progress,
)
async with progress_bar:
external_response = await unarchive_zip_to(
archive_path, external_unarchived_tools, progress_bar
)
assert last_actual_progress_value == 1 # ensure progress was reported
assert len(external_response) == sub_dirs * files_in_subdirs

_assert_same_directory_content(
internal_tools_unarchived_tools, external_unarchived_tools
)

_print_sorted(intenal_response)
_print_sorted(external_response)

assert _strip_folder_from_path(
intenal_response, to_strip=internal_tools_unarchived_tools
) == _strip_folder_from_path(external_response, to_strip=external_unarchived_tools)

0 comments on commit 65b5291

Please sign in to comment.