-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🐛 Replace unarchiving function #6959
base: master
Are you sure you want to change the base?
Changes from all commits
97fd911
29533e9
ca87d0b
7f3e41e
be58b0c
53e0728
65b5291
e6d118d
2a2f917
56d7fe1
9c82bc5
5a4cfb5
0f315e0
7ede41a
9d67dd1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
# | ||
# Installs the latest version of 7zip plugin | ||
# | ||
|
||
# http://redsymbol.net/articles/unofficial-bash-strict-mode/ | ||
set -o errexit # abort on nonzero exitstatus | ||
set -o nounset # abort on unbound variable | ||
set -o pipefail # don't hide errors within pipes | ||
IFS=$'\n\t' | ||
|
||
exec "$( dirname -- "$0"; )"/../../../scripts/install_7zip.bash |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#!/bin/bash | ||
# | ||
# Installs 7zip | ||
# | ||
|
||
# http://redsymbol.net/articles/unofficial-bash-strict-mode/ | ||
set -o errexit # abort on nonzero exitstatus | ||
set -o nounset # abort on unbound variable | ||
set -o pipefail # don't hide errors within pipes | ||
IFS=$'\n\t' | ||
|
||
|
||
SEVEN_ZIP_VERSION="2409" | ||
## 7z compression | ||
echo "create install dir" | ||
rm -rf /tmp/7zip | ||
mkdir -p /tmp/7zip | ||
cd /tmp/7zip | ||
|
||
curl -LO https://www.7-zip.org/a/7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Q: why not to have our own image with this already in place instead of downloading and installing it every time? |
||
tar -xvf 7z${SEVEN_ZIP_VERSION}-linux-x64.tar.xz | ||
cp 7zz /usr/bin/7z | ||
|
||
echo "remove install dir" | ||
rm -rf /tmp/7zip | ||
|
||
echo "test installation" | ||
7z --help |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -29,3 +29,7 @@ class ContainerExecCommandFailedError(BaseDynamicSidecarError): | |||||
"Command '{command}' exited with code '{exit_code}'" | ||||||
"and output: '{command_result}'" | ||||||
) | ||||||
|
||||||
|
||||||
class SevenZipError(BaseDynamicSidecarError): | ||||||
msg_template = "Could not finish command: '{command}'\nReason: {command_result}" | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,7 @@ | |
from models_library.projects_nodes_io import NodeIDStr | ||
from models_library.services_types import ServicePortKey | ||
from pydantic import ByteSize | ||
from servicelib.archiving_utils import PrunableFolder, archive_dir, unarchive_dir | ||
from servicelib.archiving_utils import PrunableFolder, archive_dir | ||
from servicelib.async_utils import run_sequentially_in_context | ||
from servicelib.file_utils import remove_directory | ||
from servicelib.logging_utils import log_context | ||
|
@@ -36,6 +36,7 @@ | |
|
||
from ..core.settings import ApplicationSettings, get_settings | ||
from ..modules.notifications import PortNotifier | ||
from .seven_zip_wrapper import unarchive_zip_to | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would it not make sense to put that one also in servicelib? |
||
|
||
|
||
class PortTypeName(str, Enum): | ||
|
@@ -46,7 +47,7 @@ class PortTypeName(str, Enum): | |
_FILE_TYPE_PREFIX = "data:" | ||
_KEY_VALUE_FILE_NAME = "key_values.json" | ||
|
||
logger = logging.getLogger(__name__) | ||
_logger = logging.getLogger(__name__) | ||
|
||
# OUTPUTS section | ||
|
||
|
@@ -95,7 +96,7 @@ async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915 | |
port_notifier: PortNotifier, | ||
) -> None: | ||
# pylint: disable=too-many-branches | ||
logger.debug("uploading data to simcore...") | ||
_logger.debug("uploading data to simcore...") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for this kind of logs, I guess you could use the log decorator and spare 1 line of code |
||
start_time = time.perf_counter() | ||
|
||
settings: ApplicationSettings = get_settings() | ||
|
@@ -138,7 +139,7 @@ async def upload_outputs( # pylint:disable=too-many-statements # noqa: PLR0915 | |
if is_file_type(port.property_type): | ||
src_folder = outputs_path / port.key | ||
files_and_folders_list = list(src_folder.rglob("*")) | ||
logger.debug("Discovered files to upload %s", files_and_folders_list) | ||
_logger.debug("Discovered files to upload %s", files_and_folders_list) | ||
|
||
if not files_and_folders_list: | ||
ports_values[port.key] = (None, None) | ||
|
@@ -213,9 +214,9 @@ async def _archive_dir_notified( | |
if port.key in data and data[port.key] is not None: | ||
ports_values[port.key] = (data[port.key], None) | ||
else: | ||
logger.debug("Port %s not found in %s", port.key, data) | ||
_logger.debug("Port %s not found in %s", port.key, data) | ||
else: | ||
logger.debug("No file %s to fetch port values from", data_file) | ||
_logger.debug("No file %s to fetch port values from", data_file) | ||
|
||
if archiving_tasks: | ||
await limited_gather(*archiving_tasks, limit=4) | ||
|
@@ -228,8 +229,8 @@ async def _archive_dir_notified( | |
|
||
elapsed_time = time.perf_counter() - start_time | ||
total_bytes = sum(_get_size_of_value(x) for x in ports_values.values()) | ||
logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) | ||
logger.debug(_CONTROL_TESTMARK_DY_SIDECAR_NODEPORT_UPLOADED_MESSAGE) | ||
_logger.info("Uploaded %s bytes in %s seconds", total_bytes, elapsed_time) | ||
_logger.debug(_CONTROL_TESTMARK_DY_SIDECAR_NODEPORT_UPLOADED_MESSAGE) | ||
|
||
|
||
# INPUTS section | ||
|
@@ -243,14 +244,28 @@ def _is_zip_file(file_path: Path) -> bool: | |
_shutil_move = aiofiles.os.wrap(shutil.move) | ||
|
||
|
||
async def _move_file_to_input_port( | ||
final_path: Path, downloaded_file: Path, dest_folder: PrunableFolder | ||
) -> None: | ||
Comment on lines
+247
to
+249
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this function named so this funciton
|
||
with log_context(_logger, logging.DEBUG, f"moving {downloaded_file}"): | ||
final_path = final_path / downloaded_file.name | ||
final_path.parent.mkdir(exist_ok=True, parents=True) | ||
|
||
await _shutil_move(downloaded_file, final_path) | ||
|
||
# NOTE: after the port content changes, make sure old files | ||
# which are no longer part of the port, are removed | ||
dest_folder.prune(exclude={final_path}) | ||
|
||
|
||
async def _get_data_from_port( | ||
port: Port, *, target_dir: Path, progress_bar: ProgressBarData | ||
) -> tuple[Port, ItemConcreteValue | None, ByteSize]: | ||
async with progress_bar.sub_progress( | ||
steps=2 if is_file_type(port.property_type) else 1, | ||
description=IDStr("getting data"), | ||
) as sub_progress: | ||
with log_context(logger, logging.DEBUG, f"getting {port.key=}"): | ||
with log_context(_logger, logging.DEBUG, f"getting {port.key=}"): | ||
port_data = await port.get(sub_progress) | ||
|
||
if is_file_type(port.property_type): | ||
|
@@ -261,42 +276,36 @@ async def _get_data_from_port( | |
if not downloaded_file or not downloaded_file.exists(): | ||
# the link may be empty | ||
# remove files all files from disk when disconnecting port | ||
logger.debug("removing contents of dir %s", final_path) | ||
await remove_directory( | ||
final_path, only_children=True, ignore_errors=True | ||
) | ||
with log_context( | ||
_logger, logging.DEBUG, f"removing contents of dir '{final_path}'" | ||
): | ||
await remove_directory( | ||
final_path, only_children=True, ignore_errors=True | ||
) | ||
return port, None, ByteSize(0) | ||
|
||
transferred_bytes = downloaded_file.stat().st_size | ||
|
||
# in case of valid file, it is either uncompressed and/or moved to the final directory | ||
with log_context(logger, logging.DEBUG, "creating directory"): | ||
with log_context(_logger, logging.DEBUG, "creating directory"): | ||
final_path.mkdir(exist_ok=True, parents=True) | ||
port_data = f"{final_path}" | ||
dest_folder = PrunableFolder(final_path) | ||
|
||
if _is_zip_file(downloaded_file): | ||
# unzip updated data to dest_path | ||
logger.debug("unzipping %s", downloaded_file) | ||
unarchived: set[Path] = await unarchive_dir( | ||
archive_to_extract=downloaded_file, | ||
destination_folder=final_path, | ||
progress_bar=sub_progress, | ||
) | ||
|
||
dest_folder.prune(exclude=unarchived) | ||
|
||
logger.debug("all unzipped in %s", final_path) | ||
with log_context( | ||
_logger, | ||
logging.DEBUG, | ||
f"unzipping '{downloaded_file}' to {final_path}", | ||
): | ||
unarchived: set[Path] = await unarchive_zip_to( | ||
downloaded_file, final_path, sub_progress | ||
) | ||
dest_folder.prune(exclude=unarchived) | ||
else: | ||
logger.debug("moving %s", downloaded_file) | ||
final_path = final_path / Path(downloaded_file).name | ||
await _shutil_move(str(downloaded_file), final_path) | ||
|
||
# NOTE: after the download the current value of the port | ||
# makes sure previously downloaded files are removed | ||
dest_folder.prune(exclude={final_path}) | ||
await _move_file_to_input_port(final_path, downloaded_file, dest_folder) | ||
|
||
logger.debug("all moved to %s", final_path) | ||
_logger.debug("all moved to %s", final_path) | ||
else: | ||
transferred_bytes = sys.getsizeof(port_data) | ||
|
||
|
@@ -312,7 +321,7 @@ async def download_target_ports( | |
progress_bar: ProgressBarData, | ||
port_notifier: PortNotifier | None, | ||
) -> ByteSize: | ||
logger.debug("retrieving data from simcore...") | ||
_logger.debug("retrieving data from simcore...") | ||
start_time = time.perf_counter() | ||
|
||
settings: ApplicationSettings = get_settings() | ||
|
@@ -386,7 +395,7 @@ async def _get_date_from_port_notified( | |
data_file.write_text(json.dumps(data)) | ||
|
||
elapsed_time = time.perf_counter() - start_time | ||
logger.info( | ||
_logger.info( | ||
"Downloaded %s in %s seconds", | ||
total_transfered_bytes.human_readable(decimal=True), | ||
elapsed_time, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import asyncio | ||
import logging | ||
import re | ||
from pathlib import Path | ||
|
||
from models_library.basic_types import IDStr | ||
from servicelib.progress_bar import ProgressBarData | ||
|
||
from ..core.errors import SevenZipError | ||
from ..core.utils import async_command | ||
|
||
_logger = logging.getLogger(__name__) | ||
|
||
|
||
async def _get_file_count(zip_path: Path) -> int: | ||
result = await async_command(f"7z l {zip_path}") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are there no long option names? |
||
if not result.success: | ||
raise SevenZipError(command=result.command, command_result=result.message) | ||
|
||
match = re.search(r"\s*(\d+)\s*files", result.message) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can create a variable that contains the compiled pattern for added performance, and also use named groups for added understanding. |
||
return int(match.group().replace("files", "").strip() if match else "0") | ||
|
||
|
||
async def unarchive_zip_to( | ||
zip_path: Path, | ||
output_dir: Path, | ||
progress_bar: ProgressBarData | None = None, | ||
) -> set[Path]: | ||
if not progress_bar: | ||
progress_bar = ProgressBarData( | ||
num_steps=1, description=IDStr(f"extracting {zip_path.name}") | ||
) | ||
|
||
file_count = await _get_file_count(zip_path) | ||
|
||
command = f"7z x {zip_path} -o{output_dir} -bb1" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should be careful not to end up with segmented paths in the CLI ... Normally Something like command = f"7z x \"{zip_path}\" -o \"{output_dir}\" -bb1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and long option names? 'x' '-o' --> '--output' ? |
||
process = await asyncio.create_subprocess_shell( | ||
command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE | ||
) | ||
assert process.stdout # nosec | ||
assert process.stderr # nosec | ||
|
||
async with progress_bar.sub_progress( | ||
steps=file_count, description=IDStr("...") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not the first time I see using
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @pcrespov I think this is a flaw in the ProgressBar class. I guess I did it wrong at the time. but indeed I would just remove that IDStr from there it makes no sense. But probably should go in a separate PR, maybe an issue for this would be nice. |
||
) as sub_prog: | ||
|
||
while True: | ||
line = await process.stdout.readline() | ||
if not line: | ||
break | ||
|
||
line_decoded = line.decode().strip() | ||
if line_decoded.startswith("- "): # check file entry | ||
await sub_prog.update(1) | ||
Comment on lines
+53
to
+54
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ohla.. hope that is tested |
||
|
||
await process.wait() | ||
if process.returncode != 0: | ||
stderr = await process.stderr.read() | ||
raise SevenZipError(command=command, command_result=stderr.decode().strip()) | ||
|
||
return {x for x in output_dir.rglob("*") if x.is_file()} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this done here and not in the dynamic-sidecar.bash script?